1/*
2******************************************************************************
3*
4*   Copyright (C) 1999-2010, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7******************************************************************************
8*   file name:  ubidi.h
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 1999jul27
14*   created by: Markus W. Scherer, updated by Matitiahu Allouche
15*/
16
17#ifndef UBIDI_H
18#define UBIDI_H
19
20#include "unicode/utypes.h"
21#include "unicode/uchar.h"
22#include "unicode/localpointer.h"
23
24/**
25 *\file
26 * \brief C API: Bidi algorithm
27 *
28 * <h2>Bidi algorithm for ICU</h2>
29 *
30 * This is an implementation of the Unicode Bidirectional algorithm.
31 * The algorithm is defined in the
32 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
33 * version 13, also described in The Unicode Standard, Version 4.0 .<p>
34 *
35 * Note: Libraries that perform a bidirectional algorithm and
36 * reorder strings accordingly are sometimes called "Storage Layout Engines".
37 * ICU's Bidi and shaping (u_shapeArabic()) APIs can be used at the core of such
38 * "Storage Layout Engines".
39 *
40 * <h3>General remarks about the API:</h3>
41 *
42 * In functions with an error code parameter,
43 * the <code>pErrorCode</code> pointer must be valid
44 * and the value that it points to must not indicate a failure before
45 * the function call. Otherwise, the function returns immediately.
46 * After the function call, the value indicates success or failure.<p>
47 *
48 * The &quot;limit&quot; of a sequence of characters is the position just after their
49 * last character, i.e., one more than that position.<p>
50 *
51 * Some of the API functions provide access to &quot;runs&quot;.
52 * Such a &quot;run&quot; is defined as a sequence of characters
53 * that are at the same embedding level
54 * after performing the Bidi algorithm.<p>
55 *
56 * @author Markus W. Scherer
57 * @version 1.0
58 *
59 *
60 * <h4> Sample code for the ICU Bidi API </h4>
61 *
62 * <h5>Rendering a paragraph with the ICU Bidi API</h5>
63 *
64 * This is (hypothetical) sample code that illustrates
65 * how the ICU Bidi API could be used to render a paragraph of text.
66 * Rendering code depends highly on the graphics system,
67 * therefore this sample code must make a lot of assumptions,
68 * which may or may not match any existing graphics system's properties.
69 *
70 * <p>The basic assumptions are:</p>
71 * <ul>
72 * <li>Rendering is done from left to right on a horizontal line.</li>
73 * <li>A run of single-style, unidirectional text can be rendered at once.</li>
74 * <li>Such a run of text is passed to the graphics system with
75 *     characters (code units) in logical order.</li>
76 * <li>The line-breaking algorithm is very complicated
77 *     and Locale-dependent -
78 *     and therefore its implementation omitted from this sample code.</li>
79 * </ul>
80 *
81 * <pre>
82 * \code
83 *#include "unicode/ubidi.h"
84 *
85 *typedef enum {
86 *     styleNormal=0, styleSelected=1,
87 *     styleBold=2, styleItalics=4,
88 *     styleSuper=8, styleSub=16
89 *} Style;
90 *
91 *typedef struct { int32_t limit; Style style; } StyleRun;
92 *
93 *int getTextWidth(const UChar *text, int32_t start, int32_t limit,
94 *                  const StyleRun *styleRuns, int styleRunCount);
95 *
96 * // set *pLimit and *pStyleRunLimit for a line
97 * // from text[start] and from styleRuns[styleRunStart]
98 * // using ubidi_getLogicalRun(para, ...)
99 *void getLineBreak(const UChar *text, int32_t start, int32_t *pLimit,
100 *                  UBiDi *para,
101 *                  const StyleRun *styleRuns, int styleRunStart, int *pStyleRunLimit,
102 *                  int *pLineWidth);
103 *
104 * // render runs on a line sequentially, always from left to right
105 *
106 * // prepare rendering a new line
107 * void startLine(UBiDiDirection textDirection, int lineWidth);
108 *
109 * // render a run of text and advance to the right by the run width
110 * // the text[start..limit-1] is always in logical order
111 * void renderRun(const UChar *text, int32_t start, int32_t limit,
112 *               UBiDiDirection textDirection, Style style);
113 *
114 * // We could compute a cross-product
115 * // from the style runs with the directional runs
116 * // and then reorder it.
117 * // Instead, here we iterate over each run type
118 * // and render the intersections -
119 * // with shortcuts in simple (and common) cases.
120 * // renderParagraph() is the main function.
121 *
122 * // render a directional run with
123 * // (possibly) multiple style runs intersecting with it
124 * void renderDirectionalRun(const UChar *text,
125 *                           int32_t start, int32_t limit,
126 *                           UBiDiDirection direction,
127 *                           const StyleRun *styleRuns, int styleRunCount) {
128 *     int i;
129 *
130 *     // iterate over style runs
131 *     if(direction==UBIDI_LTR) {
132 *         int styleLimit;
133 *
134 *         for(i=0; i<styleRunCount; ++i) {
135 *             styleLimit=styleRun[i].limit;
136 *             if(start<styleLimit) {
137 *                 if(styleLimit>limit) { styleLimit=limit; }
138 *                 renderRun(text, start, styleLimit,
139 *                           direction, styleRun[i].style);
140 *                 if(styleLimit==limit) { break; }
141 *                 start=styleLimit;
142 *             }
143 *         }
144 *     } else {
145 *         int styleStart;
146 *
147 *         for(i=styleRunCount-1; i>=0; --i) {
148 *             if(i>0) {
149 *                 styleStart=styleRun[i-1].limit;
150 *             } else {
151 *                 styleStart=0;
152 *             }
153 *             if(limit>=styleStart) {
154 *                 if(styleStart<start) { styleStart=start; }
155 *                 renderRun(text, styleStart, limit,
156 *                           direction, styleRun[i].style);
157 *                 if(styleStart==start) { break; }
158 *                 limit=styleStart;
159 *             }
160 *         }
161 *     }
162 * }
163 *
164 * // the line object represents text[start..limit-1]
165 * void renderLine(UBiDi *line, const UChar *text,
166 *                 int32_t start, int32_t limit,
167 *                 const StyleRun *styleRuns, int styleRunCount) {
168 *     UBiDiDirection direction=ubidi_getDirection(line);
169 *     if(direction!=UBIDI_MIXED) {
170 *         // unidirectional
171 *         if(styleRunCount<=1) {
172 *             renderRun(text, start, limit, direction, styleRuns[0].style);
173 *         } else {
174 *             renderDirectionalRun(text, start, limit,
175 *                                  direction, styleRuns, styleRunCount);
176 *         }
177 *     } else {
178 *         // mixed-directional
179 *         int32_t count, i, length;
180 *         UBiDiLevel level;
181 *
182 *         count=ubidi_countRuns(para, pErrorCode);
183 *         if(U_SUCCESS(*pErrorCode)) {
184 *             if(styleRunCount<=1) {
185 *                 Style style=styleRuns[0].style;
186 *
187 *                 // iterate over directional runs
188 *                for(i=0; i<count; ++i) {
189 *                    direction=ubidi_getVisualRun(para, i, &start, &length);
190 *                     renderRun(text, start, start+length, direction, style);
191 *                }
192 *             } else {
193 *                 int32_t j;
194 *
195 *                 // iterate over both directional and style runs
196 *                 for(i=0; i<count; ++i) {
197 *                     direction=ubidi_getVisualRun(line, i, &start, &length);
198 *                     renderDirectionalRun(text, start, start+length,
199 *                                          direction, styleRuns, styleRunCount);
200 *                 }
201 *             }
202 *         }
203 *     }
204 * }
205 *
206 *void renderParagraph(const UChar *text, int32_t length,
207 *                     UBiDiDirection textDirection,
208 *                      const StyleRun *styleRuns, int styleRunCount,
209 *                      int lineWidth,
210 *                      UErrorCode *pErrorCode) {
211 *     UBiDi *para;
212 *
213 *     if(pErrorCode==NULL || U_FAILURE(*pErrorCode) || length<=0) {
214 *         return;
215 *     }
216 *
217 *     para=ubidi_openSized(length, 0, pErrorCode);
218 *     if(para==NULL) { return; }
219 *
220 *     ubidi_setPara(para, text, length,
221 *                   textDirection ? UBIDI_DEFAULT_RTL : UBIDI_DEFAULT_LTR,
222 *                   NULL, pErrorCode);
223 *     if(U_SUCCESS(*pErrorCode)) {
224 *         UBiDiLevel paraLevel=1&ubidi_getParaLevel(para);
225 *         StyleRun styleRun={ length, styleNormal };
226 *         int width;
227 *
228 *         if(styleRuns==NULL || styleRunCount<=0) {
229 *            styleRunCount=1;
230 *             styleRuns=&styleRun;
231 *         }
232 *
233 *        // assume styleRuns[styleRunCount-1].limit>=length
234 *
235 *         width=getTextWidth(text, 0, length, styleRuns, styleRunCount);
236 *         if(width<=lineWidth) {
237 *             // everything fits onto one line
238 *
239 *            // prepare rendering a new line from either left or right
240 *             startLine(paraLevel, width);
241 *
242 *             renderLine(para, text, 0, length,
243 *                        styleRuns, styleRunCount);
244 *         } else {
245 *             UBiDi *line;
246 *
247 *             // we need to render several lines
248 *             line=ubidi_openSized(length, 0, pErrorCode);
249 *             if(line!=NULL) {
250 *                 int32_t start=0, limit;
251 *                 int styleRunStart=0, styleRunLimit;
252 *
253 *                 for(;;) {
254 *                     limit=length;
255 *                     styleRunLimit=styleRunCount;
256 *                     getLineBreak(text, start, &limit, para,
257 *                                  styleRuns, styleRunStart, &styleRunLimit,
258 *                                 &width);
259 *                     ubidi_setLine(para, start, limit, line, pErrorCode);
260 *                     if(U_SUCCESS(*pErrorCode)) {
261 *                         // prepare rendering a new line
262 *                         // from either left or right
263 *                         startLine(paraLevel, width);
264 *
265 *                         renderLine(line, text, start, limit,
266 *                                    styleRuns+styleRunStart,
267 *                                    styleRunLimit-styleRunStart);
268 *                     }
269 *                     if(limit==length) { break; }
270 *                     start=limit;
271 *                     styleRunStart=styleRunLimit-1;
272 *                     if(start>=styleRuns[styleRunStart].limit) {
273 *                         ++styleRunStart;
274 *                     }
275 *                 }
276 *
277 *                 ubidi_close(line);
278 *             }
279 *        }
280 *    }
281 *
282 *     ubidi_close(para);
283 *}
284 *\endcode
285 * </pre>
286 */
287
288/*DOCXX_TAG*/
289/*@{*/
290
291/**
292 * UBiDiLevel is the type of the level values in this
293 * Bidi implementation.
294 * It holds an embedding level and indicates the visual direction
295 * by its bit&nbsp;0 (even/odd value).<p>
296 *
297 * It can also hold non-level values for the
298 * <code>paraLevel</code> and <code>embeddingLevels</code>
299 * arguments of <code>ubidi_setPara()</code>; there:
300 * <ul>
301 * <li>bit&nbsp;7 of an <code>embeddingLevels[]</code>
302 * value indicates whether the using application is
303 * specifying the level of a character to <i>override</i> whatever the
304 * Bidi implementation would resolve it to.</li>
305 * <li><code>paraLevel</code> can be set to the
306 * pseudo-level values <code>UBIDI_DEFAULT_LTR</code>
307 * and <code>UBIDI_DEFAULT_RTL</code>.</li>
308 * </ul>
309 *
310 * @see ubidi_setPara
311 *
312 * <p>The related constants are not real, valid level values.
313 * <code>UBIDI_DEFAULT_XXX</code> can be used to specify
314 * a default for the paragraph level for
315 * when the <code>ubidi_setPara()</code> function
316 * shall determine it but there is no
317 * strongly typed character in the input.<p>
318 *
319 * Note that the value for <code>UBIDI_DEFAULT_LTR</code> is even
320 * and the one for <code>UBIDI_DEFAULT_RTL</code> is odd,
321 * just like with normal LTR and RTL level values -
322 * these special values are designed that way. Also, the implementation
323 * assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
324 *
325 * @see UBIDI_DEFAULT_LTR
326 * @see UBIDI_DEFAULT_RTL
327 * @see UBIDI_LEVEL_OVERRIDE
328 * @see UBIDI_MAX_EXPLICIT_LEVEL
329 * @stable ICU 2.0
330 */
331typedef uint8_t UBiDiLevel;
332
333/** Paragraph level setting.<p>
334 *
335 * Constant indicating that the base direction depends on the first strong
336 * directional character in the text according to the Unicode Bidirectional
337 * Algorithm. If no strong directional character is present,
338 * then set the paragraph level to 0 (left-to-right).<p>
339 *
340 * If this value is used in conjunction with reordering modes
341 * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or
342 * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
343 * is assumed to be visual LTR, and the text after reordering is required
344 * to be the corresponding logical string with appropriate contextual
345 * direction. The direction of the result string will be RTL if either
346 * the righmost or leftmost strong character of the source text is RTL
347 * or Arabic Letter, the direction will be LTR otherwise.<p>
348 *
349 * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may
350 * be added at the beginning of the result string to ensure round trip
351 * (that the result string, when reordered back to visual, will produce
352 * the original source text).
353 * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
354 * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
355 * @stable ICU 2.0
356 */
357#define UBIDI_DEFAULT_LTR 0xfe
358
359/** Paragraph level setting.<p>
360 *
361 * Constant indicating that the base direction depends on the first strong
362 * directional character in the text according to the Unicode Bidirectional
363 * Algorithm. If no strong directional character is present,
364 * then set the paragraph level to 1 (right-to-left).<p>
365 *
366 * If this value is used in conjunction with reordering modes
367 * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or
368 * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
369 * is assumed to be visual LTR, and the text after reordering is required
370 * to be the corresponding logical string with appropriate contextual
371 * direction. The direction of the result string will be RTL if either
372 * the righmost or leftmost strong character of the source text is RTL
373 * or Arabic Letter, or if the text contains no strong character;
374 * the direction will be LTR otherwise.<p>
375 *
376 * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may
377 * be added at the beginning of the result string to ensure round trip
378 * (that the result string, when reordered back to visual, will produce
379 * the original source text).
380 * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
381 * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
382 * @stable ICU 2.0
383 */
384#define UBIDI_DEFAULT_RTL 0xff
385
386/**
387 * Maximum explicit embedding level.
388 * (The maximum resolved level can be up to <code>UBIDI_MAX_EXPLICIT_LEVEL+1</code>).
389 * @stable ICU 2.0
390 */
391#define UBIDI_MAX_EXPLICIT_LEVEL 61
392
393/** Bit flag for level input.
394 *  Overrides directional properties.
395 * @stable ICU 2.0
396 */
397#define UBIDI_LEVEL_OVERRIDE 0x80
398
399/**
400 * Special value which can be returned by the mapping functions when a logical
401 * index has no corresponding visual index or vice-versa. This may happen
402 * for the logical-to-visual mapping of a Bidi control when option
403 * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is specified. This can also happen
404 * for the visual-to-logical mapping of a Bidi mark (LRM or RLM) inserted
405 * by option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
406 * @see ubidi_getVisualIndex
407 * @see ubidi_getVisualMap
408 * @see ubidi_getLogicalIndex
409 * @see ubidi_getLogicalMap
410 * @stable ICU 3.6
411 */
412#define UBIDI_MAP_NOWHERE   (-1)
413
414/**
415 * <code>UBiDiDirection</code> values indicate the text direction.
416 * @stable ICU 2.0
417 */
418enum UBiDiDirection {
419    /** All left-to-right text. This is a 0 value. @stable ICU 2.0 */
420    UBIDI_LTR,
421    /** All right-to-left text. This is a 1 value. @stable ICU 2.0 */
422    UBIDI_RTL,
423    /** Mixed-directional text. @stable ICU 2.0 */
424    UBIDI_MIXED
425};
426
427/** @stable ICU 2.0 */
428typedef enum UBiDiDirection UBiDiDirection;
429
430/**
431 * Forward declaration of the <code>UBiDi</code> structure for the declaration of
432 * the API functions. Its fields are implementation-specific.<p>
433 * This structure holds information about a paragraph (or multiple paragraphs)
434 * of text with Bidi-algorithm-related details, or about one line of
435 * such a paragraph.<p>
436 * Reordering can be done on a line, or on one or more paragraphs which are
437 * then interpreted each as one single line.
438 * @stable ICU 2.0
439 */
440struct UBiDi;
441
442/** @stable ICU 2.0 */
443typedef struct UBiDi UBiDi;
444
445/**
446 * Allocate a <code>UBiDi</code> structure.
447 * Such an object is initially empty. It is assigned
448 * the Bidi properties of a piece of text containing one or more paragraphs
449 * by <code>ubidi_setPara()</code>
450 * or the Bidi properties of a line within a paragraph by
451 * <code>ubidi_setLine()</code>.<p>
452 * This object can be reused for as long as it is not deallocated
453 * by calling <code>ubidi_close()</code>.<p>
454 * <code>ubidi_setPara()</code> and <code>ubidi_setLine()</code> will allocate
455 * additional memory for internal structures as necessary.
456 *
457 * @return An empty <code>UBiDi</code> object.
458 * @stable ICU 2.0
459 */
460U_STABLE UBiDi * U_EXPORT2
461ubidi_open(void);
462
463/**
464 * Allocate a <code>UBiDi</code> structure with preallocated memory
465 * for internal structures.
466 * This function provides a <code>UBiDi</code> object like <code>ubidi_open()</code>
467 * with no arguments, but it also preallocates memory for internal structures
468 * according to the sizings supplied by the caller.<p>
469 * Subsequent functions will not allocate any more memory, and are thus
470 * guaranteed not to fail because of lack of memory.<p>
471 * The preallocation can be limited to some of the internal memory
472 * by setting some values to 0 here. That means that if, e.g.,
473 * <code>maxRunCount</code> cannot be reasonably predetermined and should not
474 * be set to <code>maxLength</code> (the only failproof value) to avoid
475 * wasting memory, then <code>maxRunCount</code> could be set to 0 here
476 * and the internal structures that are associated with it will be allocated
477 * on demand, just like with <code>ubidi_open()</code>.
478 *
479 * @param maxLength is the maximum text or line length that internal memory
480 *        will be preallocated for. An attempt to associate this object with a
481 *        longer text will fail, unless this value is 0, which leaves the allocation
482 *        up to the implementation.
483 *
484 * @param maxRunCount is the maximum anticipated number of same-level runs
485 *        that internal memory will be preallocated for. An attempt to access
486 *        visual runs on an object that was not preallocated for as many runs
487 *        as the text was actually resolved to will fail,
488 *        unless this value is 0, which leaves the allocation up to the implementation.<br><br>
489 *        The number of runs depends on the actual text and maybe anywhere between
490 *        1 and <code>maxLength</code>. It is typically small.
491 *
492 * @param pErrorCode must be a valid pointer to an error code value.
493 *
494 * @return An empty <code>UBiDi</code> object with preallocated memory.
495 * @stable ICU 2.0
496 */
497U_STABLE UBiDi * U_EXPORT2
498ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode);
499
500/**
501 * <code>ubidi_close()</code> must be called to free the memory
502 * associated with a UBiDi object.<p>
503 *
504 * <strong>Important: </strong>
505 * A parent <code>UBiDi</code> object must not be destroyed or reused if
506 * it still has children.
507 * If a <code>UBiDi</code> object has become the <i>child</i>
508 * of another one (its <i>parent</i>) by calling
509 * <code>ubidi_setLine()</code>, then the child object must
510 * be destroyed (closed) or reused (by calling
511 * <code>ubidi_setPara()</code> or <code>ubidi_setLine()</code>)
512 * before the parent object.
513 *
514 * @param pBiDi is a <code>UBiDi</code> object.
515 *
516 * @see ubidi_setPara
517 * @see ubidi_setLine
518 * @stable ICU 2.0
519 */
520U_STABLE void U_EXPORT2
521ubidi_close(UBiDi *pBiDi);
522
523#if U_SHOW_CPLUSPLUS_API
524
525U_NAMESPACE_BEGIN
526
527/**
528 * \class LocalUBiDiPointer
529 * "Smart pointer" class, closes a UBiDi via ubidi_close().
530 * For most methods see the LocalPointerBase base class.
531 *
532 * @see LocalPointerBase
533 * @see LocalPointer
534 * @draft ICU 4.4
535 */
536U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiPointer, UBiDi, ubidi_close);
537
538U_NAMESPACE_END
539
540#endif
541
542/**
543 * Modify the operation of the Bidi algorithm such that it
544 * approximates an "inverse Bidi" algorithm. This function
545 * must be called before <code>ubidi_setPara()</code>.
546 *
547 * <p>The normal operation of the Bidi algorithm as described
548 * in the Unicode Technical Report is to take text stored in logical
549 * (keyboard, typing) order and to determine the reordering of it for visual
550 * rendering.
551 * Some legacy systems store text in visual order, and for operations
552 * with standard, Unicode-based algorithms, the text needs to be transformed
553 * to logical order. This is effectively the inverse algorithm of the
554 * described Bidi algorithm. Note that there is no standard algorithm for
555 * this "inverse Bidi" and that the current implementation provides only an
556 * approximation of "inverse Bidi".</p>
557 *
558 * <p>With <code>isInverse</code> set to <code>TRUE</code>,
559 * this function changes the behavior of some of the subsequent functions
560 * in a way that they can be used for the inverse Bidi algorithm.
561 * Specifically, runs of text with numeric characters will be treated in a
562 * special way and may need to be surrounded with LRM characters when they are
563 * written in reordered sequence.</p>
564 *
565 * <p>Output runs should be retrieved using <code>ubidi_getVisualRun()</code>.
566 * Since the actual input for "inverse Bidi" is visually ordered text and
567 * <code>ubidi_getVisualRun()</code> gets the reordered runs, these are actually
568 * the runs of the logically ordered output.</p>
569 *
570 * <p>Calling this function with argument <code>isInverse</code> set to
571 * <code>TRUE</code> is equivalent to calling
572 * <code>ubidi_setReorderingMode</code> with argument
573 * <code>reorderingMode</code>
574 * set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br>
575 * Calling this function with argument <code>isInverse</code> set to
576 * <code>FALSE</code> is equivalent to calling
577 * <code>ubidi_setReorderingMode</code> with argument
578 * <code>reorderingMode</code>
579 * set to <code>#UBIDI_REORDER_DEFAULT</code>.
580 *
581 * @param pBiDi is a <code>UBiDi</code> object.
582 *
583 * @param isInverse specifies "forward" or "inverse" Bidi operation.
584 *
585 * @see ubidi_setPara
586 * @see ubidi_writeReordered
587 * @see ubidi_setReorderingMode
588 * @stable ICU 2.0
589 */
590U_STABLE void U_EXPORT2
591ubidi_setInverse(UBiDi *pBiDi, UBool isInverse);
592
593/**
594 * Is this Bidi object set to perform the inverse Bidi algorithm?
595 * <p>Note: calling this function after setting the reordering mode with
596 * <code>ubidi_setReorderingMode</code> will return <code>TRUE</code> if the
597 * reordering mode was set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>,
598 * <code>FALSE</code> for all other values.</p>
599 *
600 * @param pBiDi is a <code>UBiDi</code> object.
601 * @return TRUE if the Bidi object is set to perform the inverse Bidi algorithm
602 * by handling numbers as L.
603 *
604 * @see ubidi_setInverse
605 * @see ubidi_setReorderingMode
606 * @stable ICU 2.0
607 */
608
609U_STABLE UBool U_EXPORT2
610ubidi_isInverse(UBiDi *pBiDi);
611
612/**
613 * Specify whether block separators must be allocated level zero,
614 * so that successive paragraphs will progress from left to right.
615 * This function must be called before <code>ubidi_setPara()</code>.
616 * Paragraph separators (B) may appear in the text.  Setting them to level zero
617 * means that all paragraph separators (including one possibly appearing
618 * in the last text position) are kept in the reordered text after the text
619 * that they follow in the source text.
620 * When this feature is not enabled, a paragraph separator at the last
621 * position of the text before reordering will go to the first position
622 * of the reordered text when the paragraph level is odd.
623 *
624 * @param pBiDi is a <code>UBiDi</code> object.
625 *
626 * @param orderParagraphsLTR specifies whether paragraph separators (B) must
627 * receive level 0, so that successive paragraphs progress from left to right.
628 *
629 * @see ubidi_setPara
630 * @stable ICU 3.4
631 */
632U_STABLE void U_EXPORT2
633ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR);
634
635/**
636 * Is this Bidi object set to allocate level 0 to block separators so that
637 * successive paragraphs progress from left to right?
638 *
639 * @param pBiDi is a <code>UBiDi</code> object.
640 * @return TRUE if the Bidi object is set to allocate level 0 to block
641 *         separators.
642 *
643 * @see ubidi_orderParagraphsLTR
644 * @stable ICU 3.4
645 */
646U_STABLE UBool U_EXPORT2
647ubidi_isOrderParagraphsLTR(UBiDi *pBiDi);
648
649/**
650 * <code>UBiDiReorderingMode</code> values indicate which variant of the Bidi
651 * algorithm to use.
652 *
653 * @see ubidi_setReorderingMode
654 * @stable ICU 3.6
655 */
656typedef enum UBiDiReorderingMode {
657    /** Regular Logical to Visual Bidi algorithm according to Unicode.
658      * This is a 0 value.
659      * @stable ICU 3.6 */
660    UBIDI_REORDER_DEFAULT = 0,
661    /** Logical to Visual algorithm which handles numbers in a way which
662      * mimicks the behavior of Windows XP.
663      * @stable ICU 3.6 */
664    UBIDI_REORDER_NUMBERS_SPECIAL,
665    /** Logical to Visual algorithm grouping numbers with adjacent R characters
666      * (reversible algorithm).
667      * @stable ICU 3.6 */
668    UBIDI_REORDER_GROUP_NUMBERS_WITH_R,
669    /** Reorder runs only to transform a Logical LTR string to the Logical RTL
670      * string with the same display, or vice-versa.<br>
671      * If this mode is set together with option
672      * <code>#UBIDI_OPTION_INSERT_MARKS</code>, some Bidi controls in the source
673      * text may be removed and other controls may be added to produce the
674      * minimum combination which has the required display.
675      * @stable ICU 3.6 */
676    UBIDI_REORDER_RUNS_ONLY,
677    /** Visual to Logical algorithm which handles numbers like L
678      * (same algorithm as selected by <code>ubidi_setInverse(TRUE)</code>.
679      * @see ubidi_setInverse
680      * @stable ICU 3.6 */
681    UBIDI_REORDER_INVERSE_NUMBERS_AS_L,
682    /** Visual to Logical algorithm equivalent to the regular Logical to Visual
683      * algorithm.
684      * @stable ICU 3.6 */
685    UBIDI_REORDER_INVERSE_LIKE_DIRECT,
686    /** Inverse Bidi (Visual to Logical) algorithm for the
687      * <code>UBIDI_REORDER_NUMBERS_SPECIAL</code> Bidi algorithm.
688      * @stable ICU 3.6 */
689    UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL,
690    /** Number of values for reordering mode.
691      * @stable ICU 3.6 */
692    UBIDI_REORDER_COUNT
693} UBiDiReorderingMode;
694
695/**
696 * Modify the operation of the Bidi algorithm such that it implements some
697 * variant to the basic Bidi algorithm or approximates an "inverse Bidi"
698 * algorithm, depending on different values of the "reordering mode".
699 * This function must be called before <code>ubidi_setPara()</code>, and stays
700 * in effect until called again with a different argument.
701 *
702 * <p>The normal operation of the Bidi algorithm as described
703 * in the Unicode Standard Annex #9 is to take text stored in logical
704 * (keyboard, typing) order and to determine how to reorder it for visual
705 * rendering.</p>
706 *
707 * <p>With the reordering mode set to a value other than
708 * <code>#UBIDI_REORDER_DEFAULT</code>, this function changes the behavior of
709 * some of the subsequent functions in a way such that they implement an
710 * inverse Bidi algorithm or some other algorithm variants.</p>
711 *
712 * <p>Some legacy systems store text in visual order, and for operations
713 * with standard, Unicode-based algorithms, the text needs to be transformed
714 * into logical order. This is effectively the inverse algorithm of the
715 * described Bidi algorithm. Note that there is no standard algorithm for
716 * this "inverse Bidi", so a number of variants are implemented here.</p>
717 *
718 * <p>In other cases, it may be desirable to emulate some variant of the
719 * Logical to Visual algorithm (e.g. one used in MS Windows), or perform a
720 * Logical to Logical transformation.</p>
721 *
722 * <ul>
723 * <li>When the reordering mode is set to <code>#UBIDI_REORDER_DEFAULT</code>,
724 * the standard Bidi Logical to Visual algorithm is applied.</li>
725 *
726 * <li>When the reordering mode is set to
727 * <code>#UBIDI_REORDER_NUMBERS_SPECIAL</code>,
728 * the algorithm used to perform Bidi transformations when calling
729 * <code>ubidi_setPara</code> should approximate the algorithm used in
730 * Microsoft Windows XP rather than strictly conform to the Unicode Bidi
731 * algorithm.
732 * <br>
733 * The differences between the basic algorithm and the algorithm addressed
734 * by this option are as follows:
735 * <ul>
736 *   <li>Within text at an even embedding level, the sequence "123AB"
737 *   (where AB represent R or AL letters) is transformed to "123BA" by the
738 *   Unicode algorithm and to "BA123" by the Windows algorithm.</li>
739 *   <li>Arabic-Indic numbers (AN) are handled by the Windows algorithm just
740 *   like regular numbers (EN).</li>
741 * </ul></li>
742 *
743 * <li>When the reordering mode is set to
744 * <code>#UBIDI_REORDER_GROUP_NUMBERS_WITH_R</code>,
745 * numbers located between LTR text and RTL text are associated with the RTL
746 * text. For instance, an LTR paragraph with content "abc 123 DEF" (where
747 * upper case letters represent RTL characters) will be transformed to
748 * "abc FED 123" (and not "abc 123 FED"), "DEF 123 abc" will be transformed
749 * to "123 FED abc" and "123 FED abc" will be transformed to "DEF 123 abc".
750 * This makes the algorithm reversible and makes it useful when round trip
751 * (from visual to logical and back to visual) must be achieved without
752 * adding LRM characters. However, this is a variation from the standard
753 * Unicode Bidi algorithm.<br>
754 * The source text should not contain Bidi control characters other than LRM
755 * or RLM.</li>
756 *
757 * <li>When the reordering mode is set to
758 * <code>#UBIDI_REORDER_RUNS_ONLY</code>,
759 * a "Logical to Logical" transformation must be performed:
760 * <ul>
761 * <li>If the default text level of the source text (argument <code>paraLevel</code>
762 * in <code>ubidi_setPara</code>) is even, the source text will be handled as
763 * LTR logical text and will be transformed to the RTL logical text which has
764 * the same LTR visual display.</li>
765 * <li>If the default level of the source text is odd, the source text
766 * will be handled as RTL logical text and will be transformed to the
767 * LTR logical text which has the same LTR visual display.</li>
768 * </ul>
769 * This mode may be needed when logical text which is basically Arabic or
770 * Hebrew, with possible included numbers or phrases in English, has to be
771 * displayed as if it had an even embedding level (this can happen if the
772 * displaying application treats all text as if it was basically LTR).
773 * <br>
774 * This mode may also be needed in the reverse case, when logical text which is
775 * basically English, with possible included phrases in Arabic or Hebrew, has to
776 * be displayed as if it had an odd embedding level.
777 * <br>
778 * Both cases could be handled by adding LRE or RLE at the head of the text,
779 * if the display subsystem supports these formatting controls. If it does not,
780 * the problem may be handled by transforming the source text in this mode
781 * before displaying it, so that it will be displayed properly.<br>
782 * The source text should not contain Bidi control characters other than LRM
783 * or RLM.</li>
784 *
785 * <li>When the reordering mode is set to
786 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>, an "inverse Bidi" algorithm
787 * is applied.
788 * Runs of text with numeric characters will be treated like LTR letters and
789 * may need to be surrounded with LRM characters when they are written in
790 * reordered sequence (the option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> can
791 * be used with function <code>ubidi_writeReordered</code> to this end. This
792 * mode is equivalent to calling <code>ubidi_setInverse()</code> with
793 * argument <code>isInverse</code> set to <code>TRUE</code>.</li>
794 *
795 * <li>When the reordering mode is set to
796 * <code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code>, the "direct" Logical to Visual
797 * Bidi algorithm is used as an approximation of an "inverse Bidi" algorithm.
798 * This mode is similar to mode <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>
799 * but is closer to the regular Bidi algorithm.
800 * <br>
801 * For example, an LTR paragraph with the content "FED 123 456 CBA" (where
802 * upper case represents RTL characters) will be transformed to
803 * "ABC 456 123 DEF", as opposed to "DEF 123 456 ABC"
804 * with mode <code>UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br>
805 * When used in conjunction with option
806 * <code>#UBIDI_OPTION_INSERT_MARKS</code>, this mode generally
807 * adds Bidi marks to the output significantly more sparingly than mode
808 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> with option
809 * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to
810 * <code>ubidi_writeReordered</code>.</li>
811 *
812 * <li>When the reordering mode is set to
813 * <code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the Logical to Visual
814 * Bidi algorithm used in Windows XP is used as an approximation of an "inverse Bidi" algorithm.
815 * <br>
816 * For example, an LTR paragraph with the content "abc FED123" (where
817 * upper case represents RTL characters) will be transformed to "abc 123DEF."</li>
818 * </ul>
819 *
820 * <p>In all the reordering modes specifying an "inverse Bidi" algorithm
821 * (i.e. those with a name starting with <code>UBIDI_REORDER_INVERSE</code>),
822 * output runs should be retrieved using
823 * <code>ubidi_getVisualRun()</code>, and the output text with
824 * <code>ubidi_writeReordered()</code>. The caller should keep in mind that in
825 * "inverse Bidi" modes the input is actually visually ordered text and
826 * reordered output returned by <code>ubidi_getVisualRun()</code> or
827 * <code>ubidi_writeReordered()</code> are actually runs or character string
828 * of logically ordered output.<br>
829 * For all the "inverse Bidi" modes, the source text should not contain
830 * Bidi control characters other than LRM or RLM.</p>
831 *
832 * <p>Note that option <code>#UBIDI_OUTPUT_REVERSE</code> of
833 * <code>ubidi_writeReordered</code> has no useful meaning and should not be
834 * used in conjunction with any value of the reordering mode specifying
835 * "inverse Bidi" or with value <code>UBIDI_REORDER_RUNS_ONLY</code>.
836 *
837 * @param pBiDi is a <code>UBiDi</code> object.
838 * @param reorderingMode specifies the required variant of the Bidi algorithm.
839 *
840 * @see UBiDiReorderingMode
841 * @see ubidi_setInverse
842 * @see ubidi_setPara
843 * @see ubidi_writeReordered
844 * @stable ICU 3.6
845 */
846U_STABLE void U_EXPORT2
847ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode);
848
849/**
850 * What is the requested reordering mode for a given Bidi object?
851 *
852 * @param pBiDi is a <code>UBiDi</code> object.
853 * @return the current reordering mode of the Bidi object
854 * @see ubidi_setReorderingMode
855 * @stable ICU 3.6
856 */
857U_STABLE UBiDiReorderingMode U_EXPORT2
858ubidi_getReorderingMode(UBiDi *pBiDi);
859
860/**
861 * <code>UBiDiReorderingOption</code> values indicate which options are
862 * specified to affect the Bidi algorithm.
863 *
864 * @see ubidi_setReorderingOptions
865 * @stable ICU 3.6
866 */
867typedef enum UBiDiReorderingOption {
868    /**
869     * option value for <code>ubidi_setReorderingOptions</code>:
870     * disable all the options which can be set with this function
871     * @see ubidi_setReorderingOptions
872     * @stable ICU 3.6
873     */
874    UBIDI_OPTION_DEFAULT = 0,
875
876    /**
877     * option bit for <code>ubidi_setReorderingOptions</code>:
878     * insert Bidi marks (LRM or RLM) when needed to ensure correct result of
879     * a reordering to a Logical order
880     *
881     * <p>This option must be set or reset before calling
882     * <code>ubidi_setPara</code>.</p>
883     *
884     * <p>This option is significant only with reordering modes which generate
885     * a result with Logical order, specifically:</p>
886     * <ul>
887     *   <li><code>#UBIDI_REORDER_RUNS_ONLY</code></li>
888     *   <li><code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code></li>
889     *   <li><code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code></li>
890     *   <li><code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li>
891     * </ul>
892     *
893     * <p>If this option is set in conjunction with reordering mode
894     * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> or with calling
895     * <code>ubidi_setInverse(TRUE)</code>, it implies
896     * option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code>
897     * in calls to function <code>ubidi_writeReordered()</code>.</p>
898     *
899     * <p>For other reordering modes, a minimum number of LRM or RLM characters
900     * will be added to the source text after reordering it so as to ensure
901     * round trip, i.e. when applying the inverse reordering mode on the
902     * resulting logical text with removal of Bidi marks
903     * (option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> set before calling
904     * <code>ubidi_setPara()</code> or option <code>#UBIDI_REMOVE_BIDI_CONTROLS</code>
905     * in <code>ubidi_writeReordered</code>), the result will be identical to the
906     * source text in the first transformation.
907     *
908     * <p>This option will be ignored if specified together with option
909     * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>. It inhibits option
910     * <code>UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to function
911     * <code>ubidi_writeReordered()</code> and it implies option
912     * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to function
913     * <code>ubidi_writeReordered()</code> if the reordering mode is
914     * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.</p>
915     *
916     * @see ubidi_setReorderingMode
917     * @see ubidi_setReorderingOptions
918     * @stable ICU 3.6
919     */
920    UBIDI_OPTION_INSERT_MARKS = 1,
921
922    /**
923     * option bit for <code>ubidi_setReorderingOptions</code>:
924     * remove Bidi control characters
925     *
926     * <p>This option must be set or reset before calling
927     * <code>ubidi_setPara</code>.</p>
928     *
929     * <p>This option nullifies option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
930     * It inhibits option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls
931     * to function <code>ubidi_writeReordered()</code> and it implies option
932     * <code>#UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to that function.</p>
933     *
934     * @see ubidi_setReorderingMode
935     * @see ubidi_setReorderingOptions
936     * @stable ICU 3.6
937     */
938    UBIDI_OPTION_REMOVE_CONTROLS = 2,
939
940    /**
941     * option bit for <code>ubidi_setReorderingOptions</code>:
942     * process the output as part of a stream to be continued
943     *
944     * <p>This option must be set or reset before calling
945     * <code>ubidi_setPara</code>.</p>
946     *
947     * <p>This option specifies that the caller is interested in processing large
948     * text object in parts.
949     * The results of the successive calls are expected to be concatenated by the
950     * caller. Only the call for the last part will have this option bit off.</p>
951     *
952     * <p>When this option bit is on, <code>ubidi_setPara()</code> may process
953     * less than the full source text in order to truncate the text at a meaningful
954     * boundary. The caller should call <code>ubidi_getProcessedLength()</code>
955     * immediately after calling <code>ubidi_setPara()</code> in order to
956     * determine how much of the source text has been processed.
957     * Source text beyond that length should be resubmitted in following calls to
958     * <code>ubidi_setPara</code>. The processed length may be less than
959     * the length of the source text if a character preceding the last character of
960     * the source text constitutes a reasonable boundary (like a block separator)
961     * for text to be continued.<br>
962     * If the last character of the source text constitutes a reasonable
963     * boundary, the whole text will be processed at once.<br>
964     * If nowhere in the source text there exists
965     * such a reasonable boundary, the processed length will be zero.<br>
966     * The caller should check for such an occurrence and do one of the following:
967     * <ul><li>submit a larger amount of text with a better chance to include
968     *         a reasonable boundary.</li>
969     *     <li>resubmit the same text after turning off option
970     *         <code>UBIDI_OPTION_STREAMING</code>.</li></ul>
971     * In all cases, this option should be turned off before processing the last
972     * part of the text.</p>
973     *
974     * <p>When the <code>UBIDI_OPTION_STREAMING</code> option is used,
975     * it is recommended to call <code>ubidi_orderParagraphsLTR()</code> with
976     * argument <code>orderParagraphsLTR</code> set to <code>TRUE</code> before
977     * calling <code>ubidi_setPara</code> so that later paragraphs may be
978     * concatenated to previous paragraphs on the right.</p>
979     *
980     * @see ubidi_setReorderingMode
981     * @see ubidi_setReorderingOptions
982     * @see ubidi_getProcessedLength
983     * @see ubidi_orderParagraphsLTR
984     * @stable ICU 3.6
985     */
986    UBIDI_OPTION_STREAMING = 4
987} UBiDiReorderingOption;
988
989/**
990 * Specify which of the reordering options
991 * should be applied during Bidi transformations.
992 *
993 * @param pBiDi is a <code>UBiDi</code> object.
994 * @param reorderingOptions is a combination of zero or more of the following
995 * options:
996 * <code>#UBIDI_OPTION_DEFAULT</code>, <code>#UBIDI_OPTION_INSERT_MARKS</code>,
997 * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>, <code>#UBIDI_OPTION_STREAMING</code>.
998 *
999 * @see ubidi_getReorderingOptions
1000 * @stable ICU 3.6
1001 */
1002U_STABLE void U_EXPORT2
1003ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions);
1004
1005/**
1006 * What are the reordering options applied to a given Bidi object?
1007 *
1008 * @param pBiDi is a <code>UBiDi</code> object.
1009 * @return the current reordering options of the Bidi object
1010 * @see ubidi_setReorderingOptions
1011 * @stable ICU 3.6
1012 */
1013U_STABLE uint32_t U_EXPORT2
1014ubidi_getReorderingOptions(UBiDi *pBiDi);
1015
1016/**
1017 * Perform the Unicode Bidi algorithm. It is defined in the
1018 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Anned #9</a>,
1019 * version 13,
1020 * also described in The Unicode Standard, Version 4.0 .<p>
1021 *
1022 * This function takes a piece of plain text containing one or more paragraphs,
1023 * with or without externally specified embedding levels from <i>styled</i>
1024 * text and computes the left-right-directionality of each character.<p>
1025 *
1026 * If the entire text is all of the same directionality, then
1027 * the function may not perform all the steps described by the algorithm,
1028 * i.e., some levels may not be the same as if all steps were performed.
1029 * This is not relevant for unidirectional text.<br>
1030 * For example, in pure LTR text with numbers the numbers would get
1031 * a resolved level of 2 higher than the surrounding text according to
1032 * the algorithm. This implementation may set all resolved levels to
1033 * the same value in such a case.<p>
1034 *
1035 * The text can be composed of multiple paragraphs. Occurrence of a block
1036 * separator in the text terminates a paragraph, and whatever comes next starts
1037 * a new paragraph. The exception to this rule is when a Carriage Return (CR)
1038 * is followed by a Line Feed (LF). Both CR and LF are block separators, but
1039 * in that case, the pair of characters is considered as terminating the
1040 * preceding paragraph, and a new paragraph will be started by a character
1041 * coming after the LF.
1042 *
1043 * @param pBiDi A <code>UBiDi</code> object allocated with <code>ubidi_open()</code>
1044 *        which will be set to contain the reordering information,
1045 *        especially the resolved levels for all the characters in <code>text</code>.
1046 *
1047 * @param text is a pointer to the text that the Bidi algorithm will be performed on.
1048 *        This pointer is stored in the UBiDi object and can be retrieved
1049 *        with <code>ubidi_getText()</code>.<br>
1050 *        <strong>Note:</strong> the text must be (at least) <code>length</code> long.
1051 *
1052 * @param length is the length of the text; if <code>length==-1</code> then
1053 *        the text must be zero-terminated.
1054 *
1055 * @param paraLevel specifies the default level for the text;
1056 *        it is typically 0 (LTR) or 1 (RTL).
1057 *        If the function shall determine the paragraph level from the text,
1058 *        then <code>paraLevel</code> can be set to
1059 *        either <code>#UBIDI_DEFAULT_LTR</code>
1060 *        or <code>#UBIDI_DEFAULT_RTL</code>; if the text contains multiple
1061 *        paragraphs, the paragraph level shall be determined separately for
1062 *        each paragraph; if a paragraph does not include any strongly typed
1063 *        character, then the desired default is used (0 for LTR or 1 for RTL).
1064 *        Any other value between 0 and <code>#UBIDI_MAX_EXPLICIT_LEVEL</code>
1065 *        is also valid, with odd levels indicating RTL.
1066 *
1067 * @param embeddingLevels (in) may be used to preset the embedding and override levels,
1068 *        ignoring characters like LRE and PDF in the text.
1069 *        A level overrides the directional property of its corresponding
1070 *        (same index) character if the level has the
1071 *        <code>#UBIDI_LEVEL_OVERRIDE</code> bit set.<br><br>
1072 *        Except for that bit, it must be
1073 *        <code>paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL</code>,
1074 *        with one exception: a level of zero may be specified for a paragraph
1075 *        separator even if <code>paraLevel>0</code> when multiple paragraphs
1076 *        are submitted in the same call to <code>ubidi_setPara()</code>.<br><br>
1077 *        <strong>Caution: </strong>A copy of this pointer, not of the levels,
1078 *        will be stored in the <code>UBiDi</code> object;
1079 *        the <code>embeddingLevels</code> array must not be
1080 *        deallocated before the <code>UBiDi</code> structure is destroyed or reused,
1081 *        and the <code>embeddingLevels</code>
1082 *        should not be modified to avoid unexpected results on subsequent Bidi operations.
1083 *        However, the <code>ubidi_setPara()</code> and
1084 *        <code>ubidi_setLine()</code> functions may modify some or all of the levels.<br><br>
1085 *        After the <code>UBiDi</code> object is reused or destroyed, the caller
1086 *        must take care of the deallocation of the <code>embeddingLevels</code> array.<br><br>
1087 *        <strong>Note:</strong> the <code>embeddingLevels</code> array must be
1088 *        at least <code>length</code> long.
1089 *        This pointer can be <code>NULL</code> if this
1090 *        value is not necessary.
1091 *
1092 * @param pErrorCode must be a valid pointer to an error code value.
1093 * @stable ICU 2.0
1094 */
1095U_STABLE void U_EXPORT2
1096ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
1097              UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
1098              UErrorCode *pErrorCode);
1099
1100/**
1101 * <code>ubidi_setLine()</code> sets a <code>UBiDi</code> to
1102 * contain the reordering information, especially the resolved levels,
1103 * for all the characters in a line of text. This line of text is
1104 * specified by referring to a <code>UBiDi</code> object representing
1105 * this information for a piece of text containing one or more paragraphs,
1106 * and by specifying a range of indexes in this text.<p>
1107 * In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p>
1108 *
1109 * This is used after calling <code>ubidi_setPara()</code>
1110 * for a piece of text, and after line-breaking on that text.
1111 * It is not necessary if each paragraph is treated as a single line.<p>
1112 *
1113 * After line-breaking, rules (L1) and (L2) for the treatment of
1114 * trailing WS and for reordering are performed on
1115 * a <code>UBiDi</code> object that represents a line.<p>
1116 *
1117 * <strong>Important: </strong><code>pLineBiDi</code> shares data with
1118 * <code>pParaBiDi</code>.
1119 * You must destroy or reuse <code>pLineBiDi</code> before <code>pParaBiDi</code>.
1120 * In other words, you must destroy or reuse the <code>UBiDi</code> object for a line
1121 * before the object for its parent paragraph.<p>
1122 *
1123 * The text pointer that was stored in <code>pParaBiDi</code> is also copied,
1124 * and <code>start</code> is added to it so that it points to the beginning of the
1125 * line for this object.
1126 *
1127 * @param pParaBiDi is the parent paragraph object. It must have been set
1128 * by a successful call to ubidi_setPara.
1129 *
1130 * @param start is the line's first index into the text.
1131 *
1132 * @param limit is just behind the line's last index into the text
1133 *        (its last index +1).<br>
1134 *        It must be <code>0<=start<limit<=</code>containing paragraph limit.
1135 *        If the specified line crosses a paragraph boundary, the function
1136 *        will terminate with error code U_ILLEGAL_ARGUMENT_ERROR.
1137 *
1138 * @param pLineBiDi is the object that will now represent a line of the text.
1139 *
1140 * @param pErrorCode must be a valid pointer to an error code value.
1141 *
1142 * @see ubidi_setPara
1143 * @see ubidi_getProcessedLength
1144 * @stable ICU 2.0
1145 */
1146U_STABLE void U_EXPORT2
1147ubidi_setLine(const UBiDi *pParaBiDi,
1148              int32_t start, int32_t limit,
1149              UBiDi *pLineBiDi,
1150              UErrorCode *pErrorCode);
1151
1152/**
1153 * Get the directionality of the text.
1154 *
1155 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1156 *
1157 * @return a value of <code>UBIDI_LTR</code>, <code>UBIDI_RTL</code>
1158 *         or <code>UBIDI_MIXED</code>
1159 *         that indicates if the entire text
1160 *         represented by this object is unidirectional,
1161 *         and which direction, or if it is mixed-directional.
1162 *
1163 * @see UBiDiDirection
1164 * @stable ICU 2.0
1165 */
1166U_STABLE UBiDiDirection U_EXPORT2
1167ubidi_getDirection(const UBiDi *pBiDi);
1168
1169/**
1170 * Get the pointer to the text.
1171 *
1172 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1173 *
1174 * @return The pointer to the text that the UBiDi object was created for.
1175 *
1176 * @see ubidi_setPara
1177 * @see ubidi_setLine
1178 * @stable ICU 2.0
1179 */
1180U_STABLE const UChar * U_EXPORT2
1181ubidi_getText(const UBiDi *pBiDi);
1182
1183/**
1184 * Get the length of the text.
1185 *
1186 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1187 *
1188 * @return The length of the text that the UBiDi object was created for.
1189 * @stable ICU 2.0
1190 */
1191U_STABLE int32_t U_EXPORT2
1192ubidi_getLength(const UBiDi *pBiDi);
1193
1194/**
1195 * Get the paragraph level of the text.
1196 *
1197 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1198 *
1199 * @return The paragraph level. If there are multiple paragraphs, their
1200 *         level may vary if the required paraLevel is UBIDI_DEFAULT_LTR or
1201 *         UBIDI_DEFAULT_RTL.  In that case, the level of the first paragraph
1202 *         is returned.
1203 *
1204 * @see UBiDiLevel
1205 * @see ubidi_getParagraph
1206 * @see ubidi_getParagraphByIndex
1207 * @stable ICU 2.0
1208 */
1209U_STABLE UBiDiLevel U_EXPORT2
1210ubidi_getParaLevel(const UBiDi *pBiDi);
1211
1212/**
1213 * Get the number of paragraphs.
1214 *
1215 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1216 *
1217 * @return The number of paragraphs.
1218 * @stable ICU 3.4
1219 */
1220U_STABLE int32_t U_EXPORT2
1221ubidi_countParagraphs(UBiDi *pBiDi);
1222
1223/**
1224 * Get a paragraph, given a position within the text.
1225 * This function returns information about a paragraph.<br>
1226 * Note: if the paragraph index is known, it is more efficient to
1227 * retrieve the paragraph information using ubidi_getParagraphByIndex().<p>
1228 *
1229 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1230 *
1231 * @param charIndex is the index of a character within the text, in the
1232 *        range <code>[0..ubidi_getProcessedLength(pBiDi)-1]</code>.
1233 *
1234 * @param pParaStart will receive the index of the first character of the
1235 *        paragraph in the text.
1236 *        This pointer can be <code>NULL</code> if this
1237 *        value is not necessary.
1238 *
1239 * @param pParaLimit will receive the limit of the paragraph.
1240 *        The l-value that you point to here may be the
1241 *        same expression (variable) as the one for
1242 *        <code>charIndex</code>.
1243 *        This pointer can be <code>NULL</code> if this
1244 *        value is not necessary.
1245 *
1246 * @param pParaLevel will receive the level of the paragraph.
1247 *        This pointer can be <code>NULL</code> if this
1248 *        value is not necessary.
1249 *
1250 * @param pErrorCode must be a valid pointer to an error code value.
1251 *
1252 * @return The index of the paragraph containing the specified position.
1253 *
1254 * @see ubidi_getProcessedLength
1255 * @stable ICU 3.4
1256 */
1257U_STABLE int32_t U_EXPORT2
1258ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, int32_t *pParaStart,
1259                   int32_t *pParaLimit, UBiDiLevel *pParaLevel,
1260                   UErrorCode *pErrorCode);
1261
1262/**
1263 * Get a paragraph, given the index of this paragraph.
1264 *
1265 * This function returns information about a paragraph.<p>
1266 *
1267 * @param pBiDi is the paragraph <code>UBiDi</code> object.
1268 *
1269 * @param paraIndex is the number of the paragraph, in the
1270 *        range <code>[0..ubidi_countParagraphs(pBiDi)-1]</code>.
1271 *
1272 * @param pParaStart will receive the index of the first character of the
1273 *        paragraph in the text.
1274 *        This pointer can be <code>NULL</code> if this
1275 *        value is not necessary.
1276 *
1277 * @param pParaLimit will receive the limit of the paragraph.
1278 *        This pointer can be <code>NULL</code> if this
1279 *        value is not necessary.
1280 *
1281 * @param pParaLevel will receive the level of the paragraph.
1282 *        This pointer can be <code>NULL</code> if this
1283 *        value is not necessary.
1284 *
1285 * @param pErrorCode must be a valid pointer to an error code value.
1286 *
1287 * @stable ICU 3.4
1288 */
1289U_STABLE void U_EXPORT2
1290ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
1291                          int32_t *pParaStart, int32_t *pParaLimit,
1292                          UBiDiLevel *pParaLevel, UErrorCode *pErrorCode);
1293
1294/**
1295 * Get the level for one character.
1296 *
1297 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1298 *
1299 * @param charIndex the index of a character. It must be in the range
1300 *         [0..ubidi_getProcessedLength(pBiDi)].
1301 *
1302 * @return The level for the character at charIndex (0 if charIndex is not
1303 *         in the valid range).
1304 *
1305 * @see UBiDiLevel
1306 * @see ubidi_getProcessedLength
1307 * @stable ICU 2.0
1308 */
1309U_STABLE UBiDiLevel U_EXPORT2
1310ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex);
1311
1312/**
1313 * Get an array of levels for each character.<p>
1314 *
1315 * Note that this function may allocate memory under some
1316 * circumstances, unlike <code>ubidi_getLevelAt()</code>.
1317 *
1318 * @param pBiDi is the paragraph or line <code>UBiDi</code> object, whose
1319 *        text length must be strictly positive.
1320 *
1321 * @param pErrorCode must be a valid pointer to an error code value.
1322 *
1323 * @return The levels array for the text,
1324 *         or <code>NULL</code> if an error occurs.
1325 *
1326 * @see UBiDiLevel
1327 * @see ubidi_getProcessedLength
1328 * @stable ICU 2.0
1329 */
1330U_STABLE const UBiDiLevel * U_EXPORT2
1331ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode);
1332
1333/**
1334 * Get a logical run.
1335 * This function returns information about a run and is used
1336 * to retrieve runs in logical order.<p>
1337 * This is especially useful for line-breaking on a paragraph.
1338 *
1339 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1340 *
1341 * @param logicalPosition is a logical position within the source text.
1342 *
1343 * @param pLogicalLimit will receive the limit of the corresponding run.
1344 *        The l-value that you point to here may be the
1345 *        same expression (variable) as the one for
1346 *        <code>logicalPosition</code>.
1347 *        This pointer can be <code>NULL</code> if this
1348 *        value is not necessary.
1349 *
1350 * @param pLevel will receive the level of the corresponding run.
1351 *        This pointer can be <code>NULL</code> if this
1352 *        value is not necessary.
1353 *
1354 * @see ubidi_getProcessedLength
1355 * @stable ICU 2.0
1356 */
1357U_STABLE void U_EXPORT2
1358ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition,
1359                    int32_t *pLogicalLimit, UBiDiLevel *pLevel);
1360
1361/**
1362 * Get the number of runs.
1363 * This function may invoke the actual reordering on the
1364 * <code>UBiDi</code> object, after <code>ubidi_setPara()</code>
1365 * may have resolved only the levels of the text. Therefore,
1366 * <code>ubidi_countRuns()</code> may have to allocate memory,
1367 * and may fail doing so.
1368 *
1369 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1370 *
1371 * @param pErrorCode must be a valid pointer to an error code value.
1372 *
1373 * @return The number of runs.
1374 * @stable ICU 2.0
1375 */
1376U_STABLE int32_t U_EXPORT2
1377ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
1378
1379/**
1380 * Get one run's logical start, length, and directionality,
1381 * which can be 0 for LTR or 1 for RTL.
1382 * In an RTL run, the character at the logical start is
1383 * visually on the right of the displayed run.
1384 * The length is the number of characters in the run.<p>
1385 * <code>ubidi_countRuns()</code> should be called
1386 * before the runs are retrieved.
1387 *
1388 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1389 *
1390 * @param runIndex is the number of the run in visual order, in the
1391 *        range <code>[0..ubidi_countRuns(pBiDi)-1]</code>.
1392 *
1393 * @param pLogicalStart is the first logical character index in the text.
1394 *        The pointer may be <code>NULL</code> if this index is not needed.
1395 *
1396 * @param pLength is the number of characters (at least one) in the run.
1397 *        The pointer may be <code>NULL</code> if this is not needed.
1398 *
1399 * @return the directionality of the run,
1400 *         <code>UBIDI_LTR==0</code> or <code>UBIDI_RTL==1</code>,
1401 *         never <code>UBIDI_MIXED</code>.
1402 *
1403 * @see ubidi_countRuns
1404 *
1405 * Example:
1406 * <pre>
1407 * \code
1408 * int32_t i, count=ubidi_countRuns(pBiDi),
1409 *         logicalStart, visualIndex=0, length;
1410 * for(i=0; i<count; ++i) {
1411 *    if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, i, &logicalStart, &length)) {
1412 *         do { // LTR
1413 *             show_char(text[logicalStart++], visualIndex++);
1414 *         } while(--length>0);
1415 *     } else {
1416 *         logicalStart+=length;  // logicalLimit
1417 *         do { // RTL
1418 *             show_char(text[--logicalStart], visualIndex++);
1419 *         } while(--length>0);
1420 *     }
1421 * }
1422 *\endcode
1423 * </pre>
1424 *
1425 * Note that in right-to-left runs, code like this places
1426 * second surrogates before first ones (which is generally a bad idea)
1427 * and combining characters before base characters.
1428 * <p>
1429 * Use of <code>ubidi_writeReordered()</code>, optionally with the
1430 * <code>#UBIDI_KEEP_BASE_COMBINING</code> option, can be considered in order
1431 * to avoid these issues.
1432 * @stable ICU 2.0
1433 */
1434U_STABLE UBiDiDirection U_EXPORT2
1435ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex,
1436                   int32_t *pLogicalStart, int32_t *pLength);
1437
1438/**
1439 * Get the visual position from a logical text position.
1440 * If such a mapping is used many times on the same
1441 * <code>UBiDi</code> object, then calling
1442 * <code>ubidi_getLogicalMap()</code> is more efficient.<p>
1443 *
1444 * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no
1445 * visual position because the corresponding text character is a Bidi control
1446 * removed from output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>.
1447 * <p>
1448 * When the visual output is altered by using options of
1449 * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
1450 * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
1451 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual position returned may not
1452 * be correct. It is advised to use, when possible, reordering options
1453 * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
1454 * <p>
1455 * Note that in right-to-left runs, this mapping places
1456 * second surrogates before first ones (which is generally a bad idea)
1457 * and combining characters before base characters.
1458 * Use of <code>ubidi_writeReordered()</code>, optionally with the
1459 * <code>#UBIDI_KEEP_BASE_COMBINING</code> option can be considered instead
1460 * of using the mapping, in order to avoid these issues.
1461 *
1462 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1463 *
1464 * @param logicalIndex is the index of a character in the text.
1465 *
1466 * @param pErrorCode must be a valid pointer to an error code value.
1467 *
1468 * @return The visual position of this character.
1469 *
1470 * @see ubidi_getLogicalMap
1471 * @see ubidi_getLogicalIndex
1472 * @see ubidi_getProcessedLength
1473 * @stable ICU 2.0
1474 */
1475U_STABLE int32_t U_EXPORT2
1476ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode);
1477
1478/**
1479 * Get the logical text position from a visual position.
1480 * If such a mapping is used many times on the same
1481 * <code>UBiDi</code> object, then calling
1482 * <code>ubidi_getVisualMap()</code> is more efficient.<p>
1483 *
1484 * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no
1485 * logical position because the corresponding text character is a Bidi mark
1486 * inserted in the output by option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
1487 * <p>
1488 * This is the inverse function to <code>ubidi_getVisualIndex()</code>.
1489 * <p>
1490 * When the visual output is altered by using options of
1491 * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
1492 * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
1493 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical position returned may not
1494 * be correct. It is advised to use, when possible, reordering options
1495 * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
1496 *
1497 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1498 *
1499 * @param visualIndex is the visual position of a character.
1500 *
1501 * @param pErrorCode must be a valid pointer to an error code value.
1502 *
1503 * @return The index of this character in the text.
1504 *
1505 * @see ubidi_getVisualMap
1506 * @see ubidi_getVisualIndex
1507 * @see ubidi_getResultLength
1508 * @stable ICU 2.0
1509 */
1510U_STABLE int32_t U_EXPORT2
1511ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode);
1512
1513/**
1514 * Get a logical-to-visual index map (array) for the characters in the UBiDi
1515 * (paragraph or line) object.
1516 * <p>
1517 * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the
1518 * corresponding text characters are Bidi controls removed from the visual
1519 * output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>.
1520 * <p>
1521 * When the visual output is altered by using options of
1522 * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
1523 * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
1524 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual positions returned may not
1525 * be correct. It is advised to use, when possible, reordering options
1526 * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
1527 * <p>
1528 * Note that in right-to-left runs, this mapping places
1529 * second surrogates before first ones (which is generally a bad idea)
1530 * and combining characters before base characters.
1531 * Use of <code>ubidi_writeReordered()</code>, optionally with the
1532 * <code>#UBIDI_KEEP_BASE_COMBINING</code> option can be considered instead
1533 * of using the mapping, in order to avoid these issues.
1534 *
1535 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1536 *
1537 * @param indexMap is a pointer to an array of <code>ubidi_getProcessedLength()</code>
1538 *        indexes which will reflect the reordering of the characters.
1539 *        If option <code>#UBIDI_OPTION_INSERT_MARKS</code> is set, the number
1540 *        of elements allocated in <code>indexMap</code> must be no less than
1541 *        <code>ubidi_getResultLength()</code>.
1542 *        The array does not need to be initialized.<br><br>
1543 *        The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.
1544 *
1545 * @param pErrorCode must be a valid pointer to an error code value.
1546 *
1547 * @see ubidi_getVisualMap
1548 * @see ubidi_getVisualIndex
1549 * @see ubidi_getProcessedLength
1550 * @see ubidi_getResultLength
1551 * @stable ICU 2.0
1552 */
1553U_STABLE void U_EXPORT2
1554ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode);
1555
1556/**
1557 * Get a visual-to-logical index map (array) for the characters in the UBiDi
1558 * (paragraph or line) object.
1559 * <p>
1560 * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the
1561 * corresponding text characters are Bidi marks inserted in the visual output
1562 * by the option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
1563 * <p>
1564 * When the visual output is altered by using options of
1565 * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
1566 * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
1567 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not
1568 * be correct. It is advised to use, when possible, reordering options
1569 * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
1570 *
1571 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1572 *
1573 * @param indexMap is a pointer to an array of <code>ubidi_getResultLength()</code>
1574 *        indexes which will reflect the reordering of the characters.
1575 *        If option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is set, the number
1576 *        of elements allocated in <code>indexMap</code> must be no less than
1577 *        <code>ubidi_getProcessedLength()</code>.
1578 *        The array does not need to be initialized.<br><br>
1579 *        The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.
1580 *
1581 * @param pErrorCode must be a valid pointer to an error code value.
1582 *
1583 * @see ubidi_getLogicalMap
1584 * @see ubidi_getLogicalIndex
1585 * @see ubidi_getProcessedLength
1586 * @see ubidi_getResultLength
1587 * @stable ICU 2.0
1588 */
1589U_STABLE void U_EXPORT2
1590ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode);
1591
1592/**
1593 * This is a convenience function that does not use a UBiDi object.
1594 * It is intended to be used for when an application has determined the levels
1595 * of objects (character sequences) and just needs to have them reordered (L2).
1596 * This is equivalent to using <code>ubidi_getLogicalMap()</code> on a
1597 * <code>UBiDi</code> object.
1598 *
1599 * @param levels is an array with <code>length</code> levels that have been determined by
1600 *        the application.
1601 *
1602 * @param length is the number of levels in the array, or, semantically,
1603 *        the number of objects to be reordered.
1604 *        It must be <code>length>0</code>.
1605 *
1606 * @param indexMap is a pointer to an array of <code>length</code>
1607 *        indexes which will reflect the reordering of the characters.
1608 *        The array does not need to be initialized.<p>
1609 *        The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.
1610 * @stable ICU 2.0
1611 */
1612U_STABLE void U_EXPORT2
1613ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap);
1614
1615/**
1616 * This is a convenience function that does not use a UBiDi object.
1617 * It is intended to be used for when an application has determined the levels
1618 * of objects (character sequences) and just needs to have them reordered (L2).
1619 * This is equivalent to using <code>ubidi_getVisualMap()</code> on a
1620 * <code>UBiDi</code> object.
1621 *
1622 * @param levels is an array with <code>length</code> levels that have been determined by
1623 *        the application.
1624 *
1625 * @param length is the number of levels in the array, or, semantically,
1626 *        the number of objects to be reordered.
1627 *        It must be <code>length>0</code>.
1628 *
1629 * @param indexMap is a pointer to an array of <code>length</code>
1630 *        indexes which will reflect the reordering of the characters.
1631 *        The array does not need to be initialized.<p>
1632 *        The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.
1633 * @stable ICU 2.0
1634 */
1635U_STABLE void U_EXPORT2
1636ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap);
1637
1638/**
1639 * Invert an index map.
1640 * The index mapping of the first map is inverted and written to
1641 * the second one.
1642 *
1643 * @param srcMap is an array with <code>length</code> elements
1644 *        which defines the original mapping from a source array containing
1645 *        <code>length</code> elements to a destination array.
1646 *        Some elements of the source array may have no mapping in the
1647 *        destination array. In that case, their value will be
1648 *        the special value <code>UBIDI_MAP_NOWHERE</code>.
1649 *        All elements must be >=0 or equal to <code>UBIDI_MAP_NOWHERE</code>.
1650 *        Some elements may have a value >= <code>length</code>, if the
1651 *        destination array has more elements than the source array.
1652 *        There must be no duplicate indexes (two or more elements with the
1653 *        same value except <code>UBIDI_MAP_NOWHERE</code>).
1654 *
1655 * @param destMap is an array with a number of elements equal to 1 + the highest
1656 *        value in <code>srcMap</code>.
1657 *        <code>destMap</code> will be filled with the inverse mapping.
1658 *        If element with index i in <code>srcMap</code> has a value k different
1659 *        from <code>UBIDI_MAP_NOWHERE</code>, this means that element i of
1660 *        the source array maps to element k in the destination array.
1661 *        The inverse map will have value i in its k-th element.
1662 *        For all elements of the destination array which do not map to
1663 *        an element in the source array, the corresponding element in the
1664 *        inverse map will have a value equal to <code>UBIDI_MAP_NOWHERE</code>.
1665 *
1666 * @param length is the length of each array.
1667 * @see UBIDI_MAP_NOWHERE
1668 * @stable ICU 2.0
1669 */
1670U_STABLE void U_EXPORT2
1671ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length);
1672
1673/** option flags for ubidi_writeReordered() */
1674
1675/**
1676 * option bit for ubidi_writeReordered():
1677 * keep combining characters after their base characters in RTL runs
1678 *
1679 * @see ubidi_writeReordered
1680 * @stable ICU 2.0
1681 */
1682#define UBIDI_KEEP_BASE_COMBINING       1
1683
1684/**
1685 * option bit for ubidi_writeReordered():
1686 * replace characters with the "mirrored" property in RTL runs
1687 * by their mirror-image mappings
1688 *
1689 * @see ubidi_writeReordered
1690 * @stable ICU 2.0
1691 */
1692#define UBIDI_DO_MIRRORING              2
1693
1694/**
1695 * option bit for ubidi_writeReordered():
1696 * surround the run with LRMs if necessary;
1697 * this is part of the approximate "inverse Bidi" algorithm
1698 *
1699 * <p>This option does not imply corresponding adjustment of the index
1700 * mappings.</p>
1701 *
1702 * @see ubidi_setInverse
1703 * @see ubidi_writeReordered
1704 * @stable ICU 2.0
1705 */
1706#define UBIDI_INSERT_LRM_FOR_NUMERIC    4
1707
1708/**
1709 * option bit for ubidi_writeReordered():
1710 * remove Bidi control characters
1711 * (this does not affect #UBIDI_INSERT_LRM_FOR_NUMERIC)
1712 *
1713 * <p>This option does not imply corresponding adjustment of the index
1714 * mappings.</p>
1715 *
1716 * @see ubidi_writeReordered
1717 * @stable ICU 2.0
1718 */
1719#define UBIDI_REMOVE_BIDI_CONTROLS      8
1720
1721/**
1722 * option bit for ubidi_writeReordered():
1723 * write the output in reverse order
1724 *
1725 * <p>This has the same effect as calling <code>ubidi_writeReordered()</code>
1726 * first without this option, and then calling
1727 * <code>ubidi_writeReverse()</code> without mirroring.
1728 * Doing this in the same step is faster and avoids a temporary buffer.
1729 * An example for using this option is output to a character terminal that
1730 * is designed for RTL scripts and stores text in reverse order.</p>
1731 *
1732 * @see ubidi_writeReordered
1733 * @stable ICU 2.0
1734 */
1735#define UBIDI_OUTPUT_REVERSE            16
1736
1737/**
1738 * Get the length of the source text processed by the last call to
1739 * <code>ubidi_setPara()</code>. This length may be different from the length
1740 * of the source text if option <code>#UBIDI_OPTION_STREAMING</code>
1741 * has been set.
1742 * <br>
1743 * Note that whenever the length of the text affects the execution or the
1744 * result of a function, it is the processed length which must be considered,
1745 * except for <code>ubidi_setPara</code> (which receives unprocessed source
1746 * text) and <code>ubidi_getLength</code> (which returns the original length
1747 * of the source text).<br>
1748 * In particular, the processed length is the one to consider in the following
1749 * cases:
1750 * <ul>
1751 * <li>maximum value of the <code>limit</code> argument of
1752 * <code>ubidi_setLine</code></li>
1753 * <li>maximum value of the <code>charIndex</code> argument of
1754 * <code>ubidi_getParagraph</code></li>
1755 * <li>maximum value of the <code>charIndex</code> argument of
1756 * <code>ubidi_getLevelAt</code></li>
1757 * <li>number of elements in the array returned by <code>ubidi_getLevels</code></li>
1758 * <li>maximum value of the <code>logicalStart</code> argument of
1759 * <code>ubidi_getLogicalRun</code></li>
1760 * <li>maximum value of the <code>logicalIndex</code> argument of
1761 * <code>ubidi_getVisualIndex</code></li>
1762 * <li>number of elements filled in the <code>*indexMap</code> argument of
1763 * <code>ubidi_getLogicalMap</code></li>
1764 * <li>length of text processed by <code>ubidi_writeReordered</code></li>
1765 * </ul>
1766 *
1767 * @param pBiDi is the paragraph <code>UBiDi</code> object.
1768 *
1769 * @return The length of the part of the source text processed by
1770 *         the last call to <code>ubidi_setPara</code>.
1771 * @see ubidi_setPara
1772 * @see UBIDI_OPTION_STREAMING
1773 * @stable ICU 3.6
1774 */
1775U_STABLE int32_t U_EXPORT2
1776ubidi_getProcessedLength(const UBiDi *pBiDi);
1777
1778/**
1779 * Get the length of the reordered text resulting from the last call to
1780 * <code>ubidi_setPara()</code>. This length may be different from the length
1781 * of the source text if option <code>#UBIDI_OPTION_INSERT_MARKS</code>
1782 * or option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> has been set.
1783 * <br>
1784 * This resulting length is the one to consider in the following cases:
1785 * <ul>
1786 * <li>maximum value of the <code>visualIndex</code> argument of
1787 * <code>ubidi_getLogicalIndex</code></li>
1788 * <li>number of elements of the <code>*indexMap</code> argument of
1789 * <code>ubidi_getVisualMap</code></li>
1790 * </ul>
1791 * Note that this length stays identical to the source text length if
1792 * Bidi marks are inserted or removed using option bits of
1793 * <code>ubidi_writeReordered</code>, or if option
1794 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> has been set.
1795 *
1796 * @param pBiDi is the paragraph <code>UBiDi</code> object.
1797 *
1798 * @return The length of the reordered text resulting from
1799 *         the last call to <code>ubidi_setPara</code>.
1800 * @see ubidi_setPara
1801 * @see UBIDI_OPTION_INSERT_MARKS
1802 * @see UBIDI_OPTION_REMOVE_CONTROLS
1803 * @stable ICU 3.6
1804 */
1805U_STABLE int32_t U_EXPORT2
1806ubidi_getResultLength(const UBiDi *pBiDi);
1807
1808U_CDECL_BEGIN
1809/**
1810 * value returned by <code>UBiDiClassCallback</code> callbacks when
1811 * there is no need to override the standard Bidi class for a given code point.
1812 * @see UBiDiClassCallback
1813 * @stable ICU 3.6
1814 */
1815#define U_BIDI_CLASS_DEFAULT  U_CHAR_DIRECTION_COUNT
1816
1817/**
1818 * Callback type declaration for overriding default Bidi class values with
1819 * custom ones.
1820 * <p>Usually, the function pointer will be propagated to a <code>UBiDi</code>
1821 * object by calling the <code>ubidi_setClassCallback()</code> function;
1822 * then the callback will be invoked by the UBA implementation any time the
1823 * class of a character is to be determined.</p>
1824 *
1825 * @param context is a pointer to the callback private data.
1826 *
1827 * @param c       is the code point to get a Bidi class for.
1828 *
1829 * @return The directional property / Bidi class for the given code point
1830 *         <code>c</code> if the default class has been overridden, or
1831 *         <code>#U_BIDI_CLASS_DEFAULT</code> if the standard Bidi class value
1832 *         for <code>c</code> is to be used.
1833 * @see ubidi_setClassCallback
1834 * @see ubidi_getClassCallback
1835 * @stable ICU 3.6
1836 */
1837typedef UCharDirection U_CALLCONV
1838UBiDiClassCallback(const void *context, UChar32 c);
1839
1840U_CDECL_END
1841
1842/**
1843 * Retrieve the Bidi class for a given code point.
1844 * <p>If a <code>#UBiDiClassCallback</code> callback is defined and returns a
1845 * value other than <code>#U_BIDI_CLASS_DEFAULT</code>, that value is used;
1846 * otherwise the default class determination mechanism is invoked.</p>
1847 *
1848 * @param pBiDi is the paragraph <code>UBiDi</code> object.
1849 *
1850 * @param c     is the code point whose Bidi class must be retrieved.
1851 *
1852 * @return The Bidi class for character <code>c</code> based
1853 *         on the given <code>pBiDi</code> instance.
1854 * @see UBiDiClassCallback
1855 * @stable ICU 3.6
1856 */
1857U_STABLE UCharDirection U_EXPORT2
1858ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c);
1859
1860/**
1861 * Set the callback function and callback data used by the UBA
1862 * implementation for Bidi class determination.
1863 * <p>This may be useful for assigning Bidi classes to PUA characters, or
1864 * for special application needs. For instance, an application may want to
1865 * handle all spaces like L or R characters (according to the base direction)
1866 * when creating the visual ordering of logical lines which are part of a report
1867 * organized in columns: there should not be interaction between adjacent
1868 * cells.<p>
1869 *
1870 * @param pBiDi is the paragraph <code>UBiDi</code> object.
1871 *
1872 * @param newFn is the new callback function pointer.
1873 *
1874 * @param newContext is the new callback context pointer. This can be NULL.
1875 *
1876 * @param oldFn fillin: Returns the old callback function pointer. This can be
1877 *                      NULL.
1878 *
1879 * @param oldContext fillin: Returns the old callback's context. This can be
1880 *                           NULL.
1881 *
1882 * @param pErrorCode must be a valid pointer to an error code value.
1883 *
1884 * @see ubidi_getClassCallback
1885 * @stable ICU 3.6
1886 */
1887U_STABLE void U_EXPORT2
1888ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
1889                       const void *newContext, UBiDiClassCallback **oldFn,
1890                       const void **oldContext, UErrorCode *pErrorCode);
1891
1892/**
1893 * Get the current callback function used for Bidi class determination.
1894 *
1895 * @param pBiDi is the paragraph <code>UBiDi</code> object.
1896 *
1897 * @param fn fillin: Returns the callback function pointer.
1898 *
1899 * @param context fillin: Returns the callback's private context.
1900 *
1901 * @see ubidi_setClassCallback
1902 * @stable ICU 3.6
1903 */
1904U_STABLE void U_EXPORT2
1905ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context);
1906
1907/**
1908 * Take a <code>UBiDi</code> object containing the reordering
1909 * information for a piece of text (one or more paragraphs) set by
1910 * <code>ubidi_setPara()</code> or for a line of text set by
1911 * <code>ubidi_setLine()</code> and write a reordered string to the
1912 * destination buffer.
1913 *
1914 * This function preserves the integrity of characters with multiple
1915 * code units and (optionally) combining characters.
1916 * Characters in RTL runs can be replaced by mirror-image characters
1917 * in the destination buffer. Note that "real" mirroring has
1918 * to be done in a rendering engine by glyph selection
1919 * and that for many "mirrored" characters there are no
1920 * Unicode characters as mirror-image equivalents.
1921 * There are also options to insert or remove Bidi control
1922 * characters; see the description of the <code>destSize</code>
1923 * and <code>options</code> parameters and of the option bit flags.
1924 *
1925 * @param pBiDi A pointer to a <code>UBiDi</code> object that
1926 *              is set by <code>ubidi_setPara()</code> or
1927 *              <code>ubidi_setLine()</code> and contains the reordering
1928 *              information for the text that it was defined for,
1929 *              as well as a pointer to that text.<br><br>
1930 *              The text was aliased (only the pointer was stored
1931 *              without copying the contents) and must not have been modified
1932 *              since the <code>ubidi_setPara()</code> call.
1933 *
1934 * @param dest A pointer to where the reordered text is to be copied.
1935 *             The source text and <code>dest[destSize]</code>
1936 *             must not overlap.
1937 *
1938 * @param destSize The size of the <code>dest</code> buffer,
1939 *                 in number of UChars.
1940 *                 If the <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>
1941 *                 option is set, then the destination length could be
1942 *                 as large as
1943 *                 <code>ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi)</code>.
1944 *                 If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option
1945 *                 is set, then the destination length may be less than
1946 *                 <code>ubidi_getLength(pBiDi)</code>.
1947 *                 If none of these options is set, then the destination length
1948 *                 will be exactly <code>ubidi_getProcessedLength(pBiDi)</code>.
1949 *
1950 * @param options A bit set of options for the reordering that control
1951 *                how the reordered text is written.
1952 *                The options include mirroring the characters on a code
1953 *                point basis and inserting LRM characters, which is used
1954 *                especially for transforming visually stored text
1955 *                to logically stored text (although this is still an
1956 *                imperfect implementation of an "inverse Bidi" algorithm
1957 *                because it uses the "forward Bidi" algorithm at its core).
1958 *                The available options are:
1959 *                <code>#UBIDI_DO_MIRRORING</code>,
1960 *                <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
1961 *                <code>#UBIDI_KEEP_BASE_COMBINING</code>,
1962 *                <code>#UBIDI_OUTPUT_REVERSE</code>,
1963 *                <code>#UBIDI_REMOVE_BIDI_CONTROLS</code>
1964 *
1965 * @param pErrorCode must be a valid pointer to an error code value.
1966 *
1967 * @return The length of the output string.
1968 *
1969 * @see ubidi_getProcessedLength
1970 * @stable ICU 2.0
1971 */
1972U_STABLE int32_t U_EXPORT2
1973ubidi_writeReordered(UBiDi *pBiDi,
1974                     UChar *dest, int32_t destSize,
1975                     uint16_t options,
1976                     UErrorCode *pErrorCode);
1977
1978/**
1979 * Reverse a Right-To-Left run of Unicode text.
1980 *
1981 * This function preserves the integrity of characters with multiple
1982 * code units and (optionally) combining characters.
1983 * Characters can be replaced by mirror-image characters
1984 * in the destination buffer. Note that "real" mirroring has
1985 * to be done in a rendering engine by glyph selection
1986 * and that for many "mirrored" characters there are no
1987 * Unicode characters as mirror-image equivalents.
1988 * There are also options to insert or remove Bidi control
1989 * characters.
1990 *
1991 * This function is the implementation for reversing RTL runs as part
1992 * of <code>ubidi_writeReordered()</code>. For detailed descriptions
1993 * of the parameters, see there.
1994 * Since no Bidi controls are inserted here, the output string length
1995 * will never exceed <code>srcLength</code>.
1996 *
1997 * @see ubidi_writeReordered
1998 *
1999 * @param src A pointer to the RTL run text.
2000 *
2001 * @param srcLength The length of the RTL run.
2002 *
2003 * @param dest A pointer to where the reordered text is to be copied.
2004 *             <code>src[srcLength]</code> and <code>dest[destSize]</code>
2005 *             must not overlap.
2006 *
2007 * @param destSize The size of the <code>dest</code> buffer,
2008 *                 in number of UChars.
2009 *                 If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option
2010 *                 is set, then the destination length may be less than
2011 *                 <code>srcLength</code>.
2012 *                 If this option is not set, then the destination length
2013 *                 will be exactly <code>srcLength</code>.
2014 *
2015 * @param options A bit set of options for the reordering that control
2016 *                how the reordered text is written.
2017 *                See the <code>options</code> parameter in <code>ubidi_writeReordered()</code>.
2018 *
2019 * @param pErrorCode must be a valid pointer to an error code value.
2020 *
2021 * @return The length of the output string.
2022 * @stable ICU 2.0
2023 */
2024U_STABLE int32_t U_EXPORT2
2025ubidi_writeReverse(const UChar *src, int32_t srcLength,
2026                   UChar *dest, int32_t destSize,
2027                   uint16_t options,
2028                   UErrorCode *pErrorCode);
2029
2030/*#define BIDI_SAMPLE_CODE*/
2031/*@}*/
2032
2033#endif
2034