1/*
2******************************************************************************
3*
4* Copyright (C) 2016 and later: Unicode, Inc. and others.
5* License & terms of use: http://www.unicode.org/copyright.html
6*
7******************************************************************************
8*   file name:  ubiditransform.c
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2016jul24
14*   created by: Lina Kemmel
15*
16*/
17
18#include "cmemory.h"
19#include "unicode/ubidi.h"
20#include "unicode/ustring.h"
21#include "unicode/ushape.h"
22#include "unicode/utf16.h"
23#include "ustr_imp.h"
24#include "unicode/ubiditransform.h"
25
26/* Some convenience defines */
27#define LTR                     UBIDI_LTR
28#define RTL                     UBIDI_RTL
29#define LOGICAL                 UBIDI_LOGICAL
30#define VISUAL                  UBIDI_VISUAL
31#define SHAPE_LOGICAL           U_SHAPE_TEXT_DIRECTION_LOGICAL
32#define SHAPE_VISUAL            U_SHAPE_TEXT_DIRECTION_VISUAL_LTR
33
34#define CHECK_LEN(STR, LEN, ERROR) { \
35        if (LEN == 0) return 0; \
36        if (LEN < -1) { *(ERROR) = U_ILLEGAL_ARGUMENT_ERROR; return 0; } \
37        if (LEN == -1) LEN = u_strlen(STR); \
38    }
39
40#define MAX_ACTIONS     7
41
42/**
43 * Typedef for a pointer to a function, which performs some operation (such as
44 * reordering, setting "inverse" mode, character mirroring, etc.). Return value
45 * indicates whether the text was changed in the course of this operation or
46 * not.
47 */
48typedef UBool (*UBiDiAction)(UBiDiTransform *, UErrorCode *);
49
50/**
51 * Structure that holds a predefined reordering scheme, including the following
52 * information:
53 * <ul>
54 * <li>an input base direction,</li>
55 * <li>an input order,</li>
56 * <li>an output base direction,</li>
57 * <li>an output order,</li>
58 * <li>a digit shaping direction,</li>
59 * <li>a letter shaping direction,</li>
60 * <li>a base direction that should be applied when the reordering engine is
61 *     invoked (which can not always be derived from the caller-defined
62 *     options),</li>
63 * <li>an array of pointers to functions that accomplish the bidi layout
64 *     transformation.</li>
65 * </ul>
66 */
67typedef struct {
68    UBiDiLevel        inLevel;               /* input level */
69    UBiDiOrder        inOrder;               /* input order */
70    UBiDiLevel        outLevel;              /* output level */
71    UBiDiOrder        outOrder;              /* output order */
72    uint32_t          digitsDir;             /* digit shaping direction */
73    uint32_t          lettersDir;            /* letter shaping direction */
74    UBiDiLevel        baseLevel;             /* paragraph level to be used with setPara */
75    const UBiDiAction actions[MAX_ACTIONS];  /* array of pointers to functions carrying out the transformation */
76} ReorderingScheme;
77
78struct UBiDiTransform {
79    UBiDi                   *pBidi;             /* pointer to a UBiDi object */
80    const ReorderingScheme  *pActiveScheme;     /* effective reordering scheme */
81    UChar                   *src;               /* input text */
82    UChar                   *dest;              /* output text */
83    uint32_t                srcLength;          /* input text length - not really needed as we are zero-terminated and can u_strlen */
84    uint32_t                srcSize;            /* input text capacity excluding the trailing zero */
85    uint32_t                destSize;           /* output text capacity */
86    uint32_t                *pDestLength;       /* number of UChars written to dest */
87    uint32_t                reorderingOptions;  /* reordering options - currently only suppot DO_MIRRORING */
88    uint32_t                digits;             /* digit option for ArabicShaping */
89    uint32_t                letters;            /* letter option for ArabicShaping */
90};
91
92U_DRAFT UBiDiTransform* U_EXPORT2
93ubiditransform_open(UErrorCode *pErrorCode)
94{
95    UBiDiTransform *pBiDiTransform = NULL;
96    if (U_SUCCESS(*pErrorCode)) {
97        pBiDiTransform = (UBiDiTransform*) uprv_calloc(1, sizeof(UBiDiTransform));
98        if (pBiDiTransform == NULL) {
99            *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
100        }
101    }
102    return pBiDiTransform;
103}
104
105U_DRAFT void U_EXPORT2
106ubiditransform_close(UBiDiTransform *pBiDiTransform)
107{
108    if (pBiDiTransform != NULL) {
109        if (pBiDiTransform->pBidi != NULL) {
110            ubidi_close(pBiDiTransform->pBidi);
111        }
112        if (pBiDiTransform->src != NULL) {
113            uprv_free(pBiDiTransform->src);
114        }
115        uprv_free(pBiDiTransform);
116    }
117}
118
119/**
120 * Performs Bidi resolution of text.
121 *
122 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
123 * @param pErrorCode Pointer to the error code value.
124 *
125 * @return Whether or not this function modifies the text. Besides the return
126 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
127 */
128static UBool
129action_resolve(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
130{
131    ubidi_setPara(pTransform->pBidi, pTransform->src, pTransform->srcLength,
132            pTransform->pActiveScheme->baseLevel, NULL, pErrorCode);
133    return FALSE;
134}
135
136/**
137 * Performs basic reordering of text (Logical -> Visual LTR).
138 *
139 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
140 * @param pErrorCode Pointer to the error code value.
141 *
142 * @return Whether or not this function modifies the text. Besides the return
143 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
144 */
145static UBool
146action_reorder(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
147{
148    ubidi_writeReordered(pTransform->pBidi, pTransform->dest, pTransform->destSize,
149            pTransform->reorderingOptions, pErrorCode);
150
151    *pTransform->pDestLength = pTransform->srcLength;
152    pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT;
153    return TRUE;
154}
155
156/**
157 * Sets "inverse" mode on the <code>UBiDi</code> object.
158 *
159 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
160 * @param pErrorCode Pointer to the error code value.
161 *
162 * @return Whether or not this function modifies the text. Besides the return
163 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
164 */
165static UBool
166action_setInverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
167{
168    ubidi_setInverse(pTransform->pBidi, TRUE);
169    ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_INVERSE_LIKE_DIRECT);
170    return FALSE;
171}
172
173/**
174 * Sets "runs only" reordering mode indicating a Logical LTR <-> Logical RTL
175 * transformation.
176 *
177 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
178 * @param pErrorCode Pointer to the error code value.
179 *
180 * @return Whether or not this function modifies the text. Besides the return
181 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
182 */
183static UBool
184action_setRunsOnly(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
185{
186    ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_RUNS_ONLY);
187    return FALSE;
188}
189
190/**
191 * Performs string reverse.
192 *
193 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
194 * @param pErrorCode Pointer to the error code value.
195 *
196 * @return Whether or not this function modifies the text. Besides the return
197 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
198 */
199static UBool
200action_reverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
201{
202    ubidi_writeReverse(pTransform->src, pTransform->srcLength,
203            pTransform->dest, pTransform->destSize,
204            UBIDI_REORDER_DEFAULT, pErrorCode);
205    *pTransform->pDestLength = pTransform->srcLength;
206    return TRUE;
207}
208
209/**
210 * Applies a new value to the text that serves as input at the current
211 * processing step. This value is identical to the original one when we begin
212 * the processing, but usually changes as the transformation progresses.
213 *
214 * @param pTransform A pointer to the <code>UBiDiTransform</code> structure.
215 * @param newSrc A pointer whose value is to be used as input text.
216 * @param newLength A length of the new text in <code>UChar</code>s.
217 * @param newSize A new source capacity in <code>UChar</code>s.
218 * @param pErrorCode Pointer to the error code value.
219 */
220static void
221updateSrc(UBiDiTransform *pTransform, const UChar *newSrc, uint32_t newLength,
222        uint32_t newSize, UErrorCode *pErrorCode)
223{
224    if (newSize < newLength) {
225        *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
226        return;
227    }
228    if (newSize > pTransform->srcSize) {
229        newSize += 50; // allocate slightly more than needed right now
230        if (pTransform->src != NULL) {
231            uprv_free(pTransform->src);
232            pTransform->src = NULL;
233        }
234        pTransform->src = (UChar *)uprv_malloc(newSize * sizeof(UChar));
235        if (pTransform->src == NULL) {
236            *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
237            //pTransform->srcLength = pTransform->srcSize = 0;
238            return;
239        }
240        pTransform->srcSize = newSize;
241    }
242    u_strncpy(pTransform->src, newSrc, newLength);
243    pTransform->srcLength = u_terminateUChars(pTransform->src,
244    		pTransform->srcSize, newLength, pErrorCode);
245}
246
247/**
248 * Calls a lower level shaping function.
249 *
250 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
251 * @param options Shaping options.
252 * @param pErrorCode Pointer to the error code value.
253 */
254static void
255doShape(UBiDiTransform *pTransform, uint32_t options, UErrorCode *pErrorCode)
256{
257    *pTransform->pDestLength = u_shapeArabic(pTransform->src,
258            pTransform->srcLength, pTransform->dest, pTransform->destSize,
259            options, pErrorCode);
260}
261
262/**
263 * Performs digit and letter shaping.
264 *
265 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
266 * @param pErrorCode Pointer to the error code value.
267 *
268 * @return Whether or not this function modifies the text. Besides the return
269 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
270 */
271static UBool
272action_shapeArabic(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
273{
274    if ((pTransform->letters | pTransform->digits) == 0) {
275        return FALSE;
276    }
277    if (pTransform->pActiveScheme->lettersDir == pTransform->pActiveScheme->digitsDir) {
278        doShape(pTransform, pTransform->letters | pTransform->digits | pTransform->pActiveScheme->lettersDir,
279                pErrorCode);
280    } else {
281        doShape(pTransform, pTransform->digits | pTransform->pActiveScheme->digitsDir, pErrorCode);
282        if (U_SUCCESS(*pErrorCode)) {
283            updateSrc(pTransform, pTransform->dest, *pTransform->pDestLength,
284                    *pTransform->pDestLength, pErrorCode);
285            doShape(pTransform, pTransform->letters | pTransform->pActiveScheme->lettersDir,
286                    pErrorCode);
287        }
288    }
289    return TRUE;
290}
291
292/**
293 * Performs character mirroring.
294 *
295 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
296 * @param pErrorCode Pointer to the error code value.
297 *
298 * @return Whether or not this function modifies the text. Besides the return
299 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
300 */
301static UBool
302action_mirror(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
303{
304    UChar32 c;
305    uint32_t i = 0, j = 0;
306    if (0 == (pTransform->reorderingOptions & UBIDI_DO_MIRRORING)) {
307        return FALSE;
308    }
309    if (pTransform->destSize < pTransform->srcLength) {
310        *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
311        return FALSE;
312    }
313    do {
314        UBool isOdd = ubidi_getLevelAt(pTransform->pBidi, i) & 1;
315        U16_NEXT(pTransform->src, i, pTransform->srcLength, c);
316        U16_APPEND_UNSAFE(pTransform->dest, j, isOdd ? u_charMirror(c) : c);
317    } while (i < pTransform->srcLength);
318
319    *pTransform->pDestLength = pTransform->srcLength;
320    pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT;
321    return TRUE;
322}
323
324/**
325 * All possible reordering schemes.
326 *
327 */
328static const ReorderingScheme Schemes[] =
329{
330    /* 0: Logical LTR => Visual LTR */
331    {LTR, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
332            {action_shapeArabic, action_resolve, action_reorder, NULL}},
333    /* 1: Logical RTL => Visual LTR */
334    {RTL, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL,
335            {action_resolve, action_reorder, action_shapeArabic, NULL}},
336    /* 2: Logical LTR => Visual RTL */
337    {LTR, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
338            {action_shapeArabic, action_resolve, action_reorder, action_reverse, NULL}},
339    /* 3: Logical RTL => Visual RTL */
340    {RTL, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL,
341            {action_resolve, action_reorder, action_shapeArabic, action_reverse, NULL}},
342    /* 4: Visual LTR => Logical RTL */
343    {LTR, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL,
344            {action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}},
345    /* 5: Visual RTL => Logical RTL */
346    {RTL, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL,
347            {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}},
348    /* 6: Visual LTR => Logical LTR */
349    {LTR, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
350            {action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}},
351    /* 7: Visual RTL => Logical LTR */
352    {RTL, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
353            {action_reverse, action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}},
354    /* 8: Logical LTR => Logical RTL */
355    {LTR, LOGICAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
356            {action_shapeArabic, action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, NULL}},
357    /* 9: Logical RTL => Logical LTR */
358    {RTL, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, RTL,
359            {action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, action_shapeArabic, NULL}},
360    /* 10: Visual LTR => Visual RTL */
361    {LTR, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR,
362            {action_shapeArabic, action_setInverse, action_resolve, action_mirror, action_reverse, NULL}},
363    /* 11: Visual RTL => Visual LTR */
364    {RTL, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR,
365            {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_mirror, NULL}},
366    /* 12: Logical LTR => Logical LTR */
367    {LTR, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
368            {action_resolve, action_mirror, action_shapeArabic, NULL}},
369    /* 13: Logical RTL => Logical RTL */
370    {RTL, LOGICAL, RTL, LOGICAL, SHAPE_VISUAL, SHAPE_LOGICAL, RTL,
371            {action_resolve, action_mirror, action_shapeArabic, NULL}},
372    /* 14: Visual LTR => Visual LTR */
373    {LTR, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR,
374            {action_resolve, action_mirror, action_shapeArabic, NULL}},
375    /* 15: Visual RTL => Visual RTL */
376    {RTL, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR,
377            {action_reverse, action_resolve, action_mirror, action_shapeArabic, action_reverse, NULL}}
378};
379
380static const uint32_t nSchemes = sizeof(Schemes) / sizeof(*Schemes);
381
382/**
383 * When the direction option is <code>UBIDI_DEFAULT_LTR</code> or
384 * <code>UBIDI_DEFAULT_RTL</code>, resolve the base direction according to that
385 * of the first strong bidi character.
386 */
387static void
388resolveBaseDirection(const UChar *text, uint32_t length,
389        UBiDiLevel *pInLevel, UBiDiLevel *pOutLevel)
390{
391    switch (*pInLevel) {
392        case UBIDI_DEFAULT_LTR:
393        case UBIDI_DEFAULT_RTL: {
394            UBiDiLevel level = ubidi_getBaseDirection(text, length);
395            *pInLevel = level != UBIDI_NEUTRAL ? level
396                    : *pInLevel == UBIDI_DEFAULT_RTL ? RTL : LTR;
397            break;
398        }
399        default:
400            *pInLevel &= 1;
401            break;
402    }
403    switch (*pOutLevel) {
404        case UBIDI_DEFAULT_LTR:
405        case UBIDI_DEFAULT_RTL:
406            *pOutLevel = *pInLevel;
407            break;
408        default:
409            *pOutLevel &= 1;
410            break;
411    }
412}
413
414/**
415 * Finds a valid <code>ReorderingScheme</code> matching the
416 * caller-defined scheme.
417 *
418 * @return A valid <code>ReorderingScheme</code> object or NULL
419 */
420static const ReorderingScheme*
421findMatchingScheme(UBiDiLevel inLevel, UBiDiLevel outLevel,
422        UBiDiOrder inOrder, UBiDiOrder outOrder)
423{
424    uint32_t i;
425    for (i = 0; i < nSchemes; i++) {
426        const ReorderingScheme *pScheme = Schemes + i;
427        if (inLevel == pScheme->inLevel && outLevel == pScheme->outLevel
428                && inOrder == pScheme->inOrder && outOrder == pScheme->outOrder) {
429            return pScheme;
430        }
431    }
432    return NULL;
433}
434
435U_DRAFT uint32_t U_EXPORT2
436ubiditransform_transform(UBiDiTransform *pBiDiTransform,
437            const UChar *src, int32_t srcLength,
438            UChar *dest, int32_t destSize,
439            UBiDiLevel inParaLevel, UBiDiOrder inOrder,
440            UBiDiLevel outParaLevel, UBiDiOrder outOrder,
441            UBiDiMirroring doMirroring, uint32_t shapingOptions,
442            UErrorCode *pErrorCode)
443{
444    uint32_t destLength = 0;
445    UBool textChanged = FALSE;
446    const UBiDiTransform *pOrigTransform = pBiDiTransform;
447    const UBiDiAction *action = NULL;
448
449    if (U_FAILURE(*pErrorCode)) {
450        return 0;
451    }
452    if (src == NULL || dest == NULL) {
453        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
454        return 0;
455    }
456    CHECK_LEN(src, srcLength, pErrorCode);
457    CHECK_LEN(dest, destSize, pErrorCode);
458
459    if (pBiDiTransform == NULL) {
460        pBiDiTransform = ubiditransform_open(pErrorCode);
461        if (U_FAILURE(*pErrorCode)) {
462            return 0;
463        }
464    }
465    /* Current limitation: in multiple paragraphs will be resolved according
466       to the 1st paragraph */
467    resolveBaseDirection(src, srcLength, &inParaLevel, &outParaLevel);
468
469    pBiDiTransform->pActiveScheme = findMatchingScheme(inParaLevel, outParaLevel,
470            inOrder, outOrder);
471    if (pBiDiTransform->pActiveScheme == NULL) {
472        goto cleanup;
473    }
474    pBiDiTransform->reorderingOptions = doMirroring ? UBIDI_DO_MIRRORING
475            : UBIDI_REORDER_DEFAULT;
476
477    /* Ignore TEXT_DIRECTION_* flags, as we apply our own depending on the text
478       scheme at the time shaping is invoked. */
479    shapingOptions &= ~U_SHAPE_TEXT_DIRECTION_MASK;
480    pBiDiTransform->digits = shapingOptions & ~U_SHAPE_LETTERS_MASK;
481    pBiDiTransform->letters = shapingOptions & ~U_SHAPE_DIGITS_MASK;
482
483    updateSrc(pBiDiTransform, src, srcLength, destSize > srcLength ? destSize : srcLength, pErrorCode);
484    if (U_FAILURE(*pErrorCode)) {
485        goto cleanup;
486    }
487    if (pBiDiTransform->pBidi == NULL) {
488        pBiDiTransform->pBidi = ubidi_openSized(0, 0, pErrorCode);
489        if (U_FAILURE(*pErrorCode)) {
490            goto cleanup;
491        }
492    }
493    pBiDiTransform->dest = dest;
494    pBiDiTransform->destSize = destSize;
495    pBiDiTransform->pDestLength = &destLength;
496
497    /* Checking for U_SUCCESS() within the loop to bail out on first failure. */
498    for (action = pBiDiTransform->pActiveScheme->actions; *action && U_SUCCESS(*pErrorCode); action++) {
499        if ((*action)(pBiDiTransform, pErrorCode)) {
500            if (action + 1) {
501                updateSrc(pBiDiTransform, pBiDiTransform->dest, *pBiDiTransform->pDestLength,
502                        *pBiDiTransform->pDestLength, pErrorCode);
503            }
504            textChanged = TRUE;
505        }
506    }
507    ubidi_setInverse(pBiDiTransform->pBidi, FALSE);
508
509    if (!textChanged && U_SUCCESS(*pErrorCode)) {
510        /* Text was not changed - just copy src to dest */
511        if (destSize < srcLength) {
512            *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
513        } else {
514            u_strncpy(dest, src, srcLength);
515            destLength = srcLength;
516        }
517    }
518cleanup:
519    if (pOrigTransform != pBiDiTransform) {
520        ubiditransform_close(pBiDiTransform);
521    } else {
522        pBiDiTransform->dest = NULL;
523        pBiDiTransform->pDestLength = NULL;
524        pBiDiTransform->srcLength = 0;
525        pBiDiTransform->destSize = 0;
526    }
527    return U_FAILURE(*pErrorCode) ? 0 : destLength;
528}
529