1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/*
5*******************************************************************************
6*   Copyright (C) 2001-2012, International Business Machines
7*   Corporation and others.  All Rights Reserved.
8*******************************************************************************
9*/
10
11package android.icu.text;
12
13import android.icu.impl.UBiDiProps;
14import android.icu.lang.UCharacterDirection;
15
16/**
17 * Shape Arabic text on a character basis.
18 *
19 * <p>ArabicShaping performs basic operations for "shaping" Arabic text. It is most
20 * useful for use with legacy data formats and legacy display technology
21 * (simple terminals). All operations are performed on Unicode characters.</p>
22 *
23 * <p>Text-based shaping means that some character code points in the text are
24 * replaced by others depending on the context. It transforms one kind of text
25 * into another. In comparison, modern displays for Arabic text select
26 * appropriate, context-dependent font glyphs for each text element, which means
27 * that they transform text into a glyph vector.</p>
28 *
29 * <p>Text transformations are necessary when modern display technology is not
30 * available or when text needs to be transformed to or from legacy formats that
31 * use "shaped" characters. Since the Arabic script is cursive, connecting
32 * adjacent letters to each other, computers select images for each letter based
33 * on the surrounding letters. This usually results in four images per Arabic
34 * letter: initial, middle, final, and isolated forms. In Unicode, on the other
35 * hand, letters are normally stored abstract, and a display system is expected
36 * to select the necessary glyphs. (This makes searching and other text
37 * processing easier because the same letter has only one code.) It is possible
38 * to mimic this with text transformations because there are characters in
39 * Unicode that are rendered as letters with a specific shape
40 * (or cursive connectivity). They were included for interoperability with
41 * legacy systems and codepages, and for unsophisticated display systems.</p>
42 *
43 * <p>A second kind of text transformations is supported for Arabic digits:
44 * For compatibility with legacy codepages that only include European digits,
45 * it is possible to replace one set of digits by another, changing the
46 * character code points. These operations can be performed for either
47 * Arabic-Indic Digits (U+0660...U+0669) or Eastern (Extended) Arabic-Indic
48 * digits (U+06f0...U+06f9).</p>
49 *
50 * <p>Some replacements may result in more or fewer characters (code points).
51 * By default, this means that the destination buffer may receive text with a
52 * length different from the source length. Some legacy systems rely on the
53 * length of the text to be constant. They expect extra spaces to be added
54 * or consumed either next to the affected character or at the end of the
55 * text.</p>
56 * @hide Only a subset of ICU is exposed in Android
57 */
58public final class ArabicShaping {
59    private final int options;
60    private boolean isLogical; // convenience
61    private boolean spacesRelativeToTextBeginEnd;
62    private char tailChar;
63
64    /**
65     * Convert a range of text in the source array, putting the result
66     * into a range of text in the destination array, and return the number
67     * of characters written.
68     *
69     * @param source An array containing the input text
70     * @param sourceStart The start of the range of text to convert
71     * @param sourceLength The length of the range of text to convert
72     * @param dest The destination array that will receive the result.
73     *   It may be <code>NULL</code> only if  <code>destSize</code> is 0.
74     * @param destStart The start of the range of the destination buffer to use.
75     * @param destSize The size (capacity) of the destination buffer.
76     *   If <code>destSize</code> is 0, then no output is produced,
77     *   but the necessary buffer size is returned ("preflighting").  This
78     *   does not validate the text against the options, for example,
79     *   if letters are being unshaped, and spaces are being consumed
80     *   following lamalef, this will not detect a lamalef without a
81     *   corresponding space.  An error will be thrown when the actual
82     *   conversion is attempted.
83     * @return The number of chars written to the destination buffer.
84     *   If an error occurs, then no output was written, or it may be
85     *   incomplete.
86     * @throws ArabicShapingException if the text cannot be converted according to the options.
87     */
88    public int shape(char[] source, int sourceStart, int sourceLength,
89                     char[] dest, int destStart, int destSize) throws ArabicShapingException {
90        if (source == null) {
91            throw new IllegalArgumentException("source can not be null");
92        }
93        if (sourceStart < 0 || sourceLength < 0 || sourceStart + sourceLength > source.length) {
94            throw new IllegalArgumentException("bad source start (" + sourceStart +
95                                               ") or length (" + sourceLength +
96                                               ") for buffer of length " + source.length);
97        }
98        if (dest == null && destSize != 0) {
99            throw new IllegalArgumentException("null dest requires destSize == 0");
100        }
101        if ((destSize != 0) &&
102            (destStart < 0 || destSize < 0 || destStart + destSize > dest.length)) {
103            throw new IllegalArgumentException("bad dest start (" + destStart +
104                                               ") or size (" + destSize +
105                                               ") for buffer of length " + dest.length);
106        }
107        /* Validate input options */
108        if ( ((options&TASHKEEL_MASK) != 0) &&
109             !(((options & TASHKEEL_MASK)==TASHKEEL_BEGIN)  ||
110               ((options & TASHKEEL_MASK)==TASHKEEL_END)    ||
111               ((options & TASHKEEL_MASK)==TASHKEEL_RESIZE) ||
112               ((options & TASHKEEL_MASK)==TASHKEEL_REPLACE_BY_TATWEEL))) {
113            throw new IllegalArgumentException("Wrong Tashkeel argument");
114        }
115
116       ///CLOVER:OFF
117       //According to Steven Loomis, the code is unreachable when you OR all the constants within the if statements
118       if(((options&LAMALEF_MASK) != 0) &&
119              !(((options & LAMALEF_MASK)==LAMALEF_BEGIN)  ||
120                ((options & LAMALEF_MASK)==LAMALEF_END)    ||
121                ((options & LAMALEF_MASK)==LAMALEF_RESIZE) ||
122                ((options & LAMALEF_MASK)==LAMALEF_AUTO)   ||
123                ((options & LAMALEF_MASK)==LAMALEF_NEAR))) {
124           throw new IllegalArgumentException("Wrong Lam Alef argument");
125       }
126       ///CLOVER:ON
127
128       /* Validate Tashkeel (Tashkeel replacement options should be enabled in shaping mode only)*/
129       if(((options&TASHKEEL_MASK) != 0) && (options&LETTERS_MASK) == LETTERS_UNSHAPE) {
130            throw new IllegalArgumentException("Tashkeel replacement should not be enabled in deshaping mode ");
131       }
132       return internalShape(source, sourceStart, sourceLength, dest, destStart, destSize);
133    }
134
135    /**
136     * Convert a range of text in place.  This may only be used if the Length option
137     * does not grow or shrink the text.
138     *
139     * @param source An array containing the input text
140     * @param start The start of the range of text to convert
141     * @param length The length of the range of text to convert
142     * @throws ArabicShapingException if the text cannot be converted according to the options.
143     */
144    public void shape(char[] source, int start, int length) throws ArabicShapingException {
145        if ((options & LAMALEF_MASK) == LAMALEF_RESIZE) {
146            throw new ArabicShapingException("Cannot shape in place with length option resize.");
147        }
148        shape(source, start, length, source, start, length);
149    }
150
151    /**
152     * Convert a string, returning the new string.
153     *
154     * @param text the string to convert
155     * @return the converted string
156     * @throws ArabicShapingException if the string cannot be converted according to the options.
157     */
158    public String shape(String text) throws ArabicShapingException {
159        char[] src = text.toCharArray();
160        char[] dest = src;
161        if (((options & LAMALEF_MASK) == LAMALEF_RESIZE) &&
162            ((options & LETTERS_MASK) == LETTERS_UNSHAPE)) {
163
164            dest = new char[src.length * 2]; // max
165        }
166        int len = shape(src, 0, src.length, dest, 0, dest.length);
167
168        return new String(dest, 0, len);
169    }
170
171    /**
172     * Construct ArabicShaping using the options flags.
173     * The flags are as follows:<br>
174     * 'LENGTH' flags control whether the text can change size, and if not,
175     * how to maintain the size of the text when LamAlef ligatures are
176     * formed or broken.<br>
177     * 'TEXT_DIRECTION' flags control whether the text is read and written
178     * in visual order or in logical order.<br>
179     * 'LETTERS_SHAPE' flags control whether conversion is to or from
180     * presentation forms.<br>
181     * 'DIGITS' flags control whether digits are shaped, and whether from
182     * European to Arabic-Indic or vice-versa.<br>
183     * 'DIGIT_TYPE' flags control whether standard or extended Arabic-Indic
184     * digits are used when performing digit conversion.
185     */
186    public ArabicShaping(int options) {
187        this.options = options;
188        if ((options & DIGITS_MASK) > 0x80) {
189            throw new IllegalArgumentException("bad DIGITS options");
190        }
191
192        isLogical = ( (options & TEXT_DIRECTION_MASK) == TEXT_DIRECTION_LOGICAL );
193        /* Validate options */
194        spacesRelativeToTextBeginEnd = ( (options & SPACES_RELATIVE_TO_TEXT_MASK) == SPACES_RELATIVE_TO_TEXT_BEGIN_END );
195        if ( (options&SHAPE_TAIL_TYPE_MASK) == SHAPE_TAIL_NEW_UNICODE){
196            tailChar = NEW_TAIL_CHAR;
197        } else {
198            tailChar = OLD_TAIL_CHAR;
199        }
200    }
201
202    /* Seen Tail options */
203    /**
204     * Memory option: the result must have the same length as the source.
205     * Shaping mode: The SEEN family character will expand into two characters using space near
206     *               the SEEN family character(i.e. the space after the character).
207     *               if there are no spaces found, ArabicShapingException will be thrown
208     *
209     * De-shaping mode: Any Seen character followed by Tail character will be
210     *                  replaced by one cell Seen and a space will replace the Tail.
211     * Affects: Seen options
212     */
213    public static final int SEEN_TWOCELL_NEAR = 0x200000;
214
215    /** Bit mask for Seen memory options.
216     */
217    public static final int SEEN_MASK = 0x700000;
218
219    /* YehHamza options */
220    /**
221     * Memory option: the result must have the same length as the source.
222     * Shaping mode: The YEHHAMZA character will expand into two characters using space near it
223     *              (i.e. the space after the character)
224     *               if there are no spaces found, ArabicShapingException will be thrown
225     *
226     * De-shaping mode: Any Yeh (final or isolated) character followed by Hamza character will be
227     *                  replaced by one cell YehHamza and space will replace the Hamza.
228     * Affects: YehHamza options
229     */
230    public static final int YEHHAMZA_TWOCELL_NEAR  = 0x1000000;
231
232
233    /** Bit mask for YehHamza memory options.
234     */
235    public static final int YEHHAMZA_MASK = 0x3800000;
236
237    /* New Tashkeel options */
238    /**
239     * Memory option: the result must have the same length as the source.
240     * Shaping mode: Tashkeel characters will be replaced by spaces.
241     *               Spaces will be placed at beginning of the buffer
242     *
243     * De-shaping mode: N/A
244     * Affects: Tashkeel options
245     */
246    public static final int TASHKEEL_BEGIN = 0x40000;
247
248    /**
249     * Memory option: the result must have the same length as the source.
250     * Shaping mode: Tashkeel characters will be replaced by spaces.
251     *               Spaces will be placed at end of the buffer
252     *
253     * De-shaping mode: N/A
254     * Affects: Tashkeel options
255     */
256    public static final int TASHKEEL_END = 0x60000;
257
258    /**
259     * Memory option: allow the result to have a different length than the source.
260     * Shaping mode: Tashkeel characters will be removed, buffer length will shrink.
261     * De-shaping mode: N/A
262     *
263     * Affects: Tashkeel options
264     */
265    public static final int TASHKEEL_RESIZE = 0x80000;
266
267    /**
268     * Memory option: the result must have the same length as the source.
269     * Shaping mode: Tashkeel characters will be replaced by Tatweel if it is connected to adjacent
270     *               characters (i.e. shaped on Tatweel) or replaced by space if it is not connected.
271     *
272     * De-shaping mode: N/A
273     * Affects: YehHamza options
274     */
275    public static final int TASHKEEL_REPLACE_BY_TATWEEL = 0xC0000;
276
277    /** Bit mask for Tashkeel replacement with Space or Tatweel memory options.
278     */
279    public static final int TASHKEEL_MASK  = 0xE0000;
280
281    /* Space location Control options */
282    /**
283     * This option effects the meaning of BEGIN and END options. if this option is not used the default
284     * for BEGIN and END will be as following:
285     * The Default (for both Visual LTR, Visual RTL and Logical Text)
286     *           1. BEGIN always refers to the start address of physical memory.
287     *           2. END always refers to the end address of physical memory.
288     *
289     * If this option is used it will swap the meaning of BEGIN and END only for Visual LTR text.
290     *
291     * The affect on BEGIN and END Memory Options will be as following:
292     *    A. BEGIN For Visual LTR text: This will be the beginning (right side) of the visual text
293     *       (corresponding to the physical memory address end, same as END in default behavior)
294     *    B. BEGIN For Logical text: Same as BEGIN in default behavior.
295     *    C. END For Visual LTR text: This will be the end (left side) of the visual text. (corresponding to
296     *      the physical memory address beginning, same as BEGIN in default behavior)
297     *    D. END For Logical text: Same as END in default behavior.
298     * Affects: All LamAlef BEGIN, END and AUTO options.
299     */
300    public static final int SPACES_RELATIVE_TO_TEXT_BEGIN_END = 0x4000000;
301
302    /** Bit mask for swapping BEGIN and END for Visual LTR text
303     */
304    public static final int SPACES_RELATIVE_TO_TEXT_MASK = 0x4000000;
305
306    /**
307     * If this option is used, shaping will use the new Unicode code point for TAIL (i.e. 0xFE73).
308     * If this option is not specified (Default), old unofficial Unicode TAIL code point is used (i.e. 0x200B)
309     * De-shaping will not use this option as it will always search for both the new Unicode code point for the
310     * TAIL (i.e. 0xFE73) or the old unofficial Unicode TAIL code point (i.e. 0x200B) and de-shape the
311     * Seen-Family letter accordingly.
312     *
313     * Shaping Mode: Only shaping.
314     * De-shaping Mode: N/A.
315     * Affects: All Seen options
316     */
317    public static final int SHAPE_TAIL_NEW_UNICODE = 0x8000000;
318
319    /** Bit mask for new Unicode Tail option
320     */
321    public static final int SHAPE_TAIL_TYPE_MASK = 0x8000000;
322
323    /**
324     * Memory option: allow the result to have a different length than the source.
325     */
326    public static final int LENGTH_GROW_SHRINK = 0;
327
328    /**
329     * Memory option: allow the result to have a different length than the source.
330     * Affects: LamAlef options
331     * This option is an alias to LENGTH_GROW_SHRINK
332     */
333    public static final int LAMALEF_RESIZE   = 0;
334
335    /**
336     * Memory option: the result must have the same length as the source.
337     * If more room is necessary, then try to consume spaces next to modified characters.
338     */
339    public static final int LENGTH_FIXED_SPACES_NEAR = 1;
340
341    /**
342     * Memory option: the result must have the same length as the source.
343     * If more room is necessary, then try to consume spaces next to modified characters.
344     * Affects: LamAlef options
345     * This option is an alias to LENGTH_FIXED_SPACES_NEAR
346     */
347    public static final int LAMALEF_NEAR = 1 ;
348
349    /**
350     * Memory option: the result must have the same length as the source.
351     * If more room is necessary, then try to consume spaces at the end of the text.
352     */
353    public static final int LENGTH_FIXED_SPACES_AT_END = 2;
354
355
356    /**
357     * Memory option: the result must have the same length as the source.
358     * If more room is necessary, then try to consume spaces at the end of the text.
359     * Affects: LamAlef options
360     * This option is an alias to LENGTH_FIXED_SPACES_AT_END
361     */
362    public static final int LAMALEF_END = 2;
363
364    /**
365     * Memory option: the result must have the same length as the source.
366     * If more room is necessary, then try to consume spaces at the beginning of the text.
367     */
368    public static final int LENGTH_FIXED_SPACES_AT_BEGINNING = 3;
369
370    /**
371     * Memory option: the result must have the same length as the source.
372     * If more room is necessary, then try to consume spaces at the beginning of the text.
373     * Affects: LamAlef options
374     * This option is an alias to LENGTH_FIXED_SPACES_AT_BEGINNING
375     */
376    public static final int LAMALEF_BEGIN = 3;
377
378    /**
379     * Memory option: the result must have the same length as the source.
380     * Shaping Mode: For each LAMALEF character found, expand LAMALEF using space at end.
381     *               If there is no space at end, use spaces at beginning of the buffer. If there
382     *               is no space at beginning of the buffer, use spaces at the near (i.e. the space
383     *               after the LAMALEF character).
384     *
385     * Deshaping Mode: Perform the same function as the flag equals LAMALEF_END.
386     * Affects: LamAlef options
387     */
388    public static final int LAMALEF_AUTO  = 0x10000;
389
390    /**
391     * Bit mask for memory options.
392     */
393    public static final int LENGTH_MASK = 0x10003;
394
395    /** Bit mask for LamAlef memory options.
396     */
397
398    public static final int LAMALEF_MASK  = 0x10003;
399
400    /**
401     * Direction indicator: the source is in logical (keyboard) order.
402     */
403    public static final int TEXT_DIRECTION_LOGICAL = 0;
404
405    /**
406     * Direction indicator:the source is in visual RTL order,
407     * the rightmost displayed character stored first.
408     * This option is an alias to U_SHAPE_TEXT_DIRECTION_LOGICAL
409     */
410    public static final int TEXT_DIRECTION_VISUAL_RTL = 0;
411
412    /**
413     * Direction indicator: the source is in visual (display) order, that is,
414     * the leftmost displayed character is stored first.
415     */
416    public static final int TEXT_DIRECTION_VISUAL_LTR = 4;
417
418    /**
419     * Bit mask for direction indicators.
420     */
421    public static final int TEXT_DIRECTION_MASK = 4;
422
423
424    /**
425     * Letter shaping option: do not perform letter shaping.
426     */
427    public static final int LETTERS_NOOP = 0;
428
429    /**
430     * Letter shaping option: replace normative letter characters in the U+0600 (Arabic) block,
431     * by shaped ones in the U+FE70 (Presentation Forms B) block. Performs Lam-Alef ligature
432     * substitution.
433     */
434    public static final int LETTERS_SHAPE = 8;
435
436    /**
437     * Letter shaping option: replace shaped letter characters in the U+FE70 (Presentation Forms B) block
438     * by normative ones in the U+0600 (Arabic) block.  Converts Lam-Alef ligatures to pairs of Lam and
439     * Alef characters, consuming spaces if required.
440     */
441    public static final int LETTERS_UNSHAPE = 0x10;
442
443    /**
444     * Letter shaping option: replace normative letter characters in the U+0600 (Arabic) block,
445     * except for the TASHKEEL characters at U+064B...U+0652, by shaped ones in the U+Fe70
446     * (Presentation Forms B) block.  The TASHKEEL characters will always be converted to
447     * the isolated forms rather than to their correct shape.
448     */
449    public static final int LETTERS_SHAPE_TASHKEEL_ISOLATED = 0x18;
450
451    /**
452     * Bit mask for letter shaping options.
453     */
454    public static final int LETTERS_MASK = 0x18;
455
456
457    /**
458     * Digit shaping option: do not perform digit shaping.
459     */
460    public static final int DIGITS_NOOP = 0;
461
462    /**
463     * Digit shaping option: Replace European digits (U+0030...U+0039) by Arabic-Indic digits.
464     */
465    public static final int DIGITS_EN2AN = 0x20;
466
467    /**
468     * Digit shaping option: Replace Arabic-Indic digits by European digits (U+0030...U+0039).
469     */
470    public static final int DIGITS_AN2EN = 0x40;
471
472    /**
473     * Digit shaping option:
474     * Replace European digits (U+0030...U+0039) by Arabic-Indic digits
475     * if the most recent strongly directional character
476     * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC).
477     * The initial state at the start of the text is assumed to be not an Arabic,
478     * letter, so European digits at the start of the text will not change.
479     * Compare to DIGITS_ALEN2AN_INIT_AL.
480     */
481    public static final int DIGITS_EN2AN_INIT_LR = 0x60;
482
483    /**
484     * Digit shaping option:
485     * Replace European digits (U+0030...U+0039) by Arabic-Indic digits
486     * if the most recent strongly directional character
487     * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC).
488     * The initial state at the start of the text is assumed to be an Arabic,
489     * letter, so European digits at the start of the text will change.
490     * Compare to DIGITS_ALEN2AN_INT_LR.
491     */
492    public static final int DIGITS_EN2AN_INIT_AL = 0x80;
493
494    /** Not a valid option value. */
495    //private static final int DIGITS_RESERVED = 0xa0;
496
497    /**
498     * Bit mask for digit shaping options.
499     */
500    public static final int DIGITS_MASK = 0xe0;
501
502    /**
503     * Digit type option: Use Arabic-Indic digits (U+0660...U+0669).
504     */
505    public static final int DIGIT_TYPE_AN = 0;
506
507    /**
508     * Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9).
509     */
510    public static final int DIGIT_TYPE_AN_EXTENDED = 0x100;
511
512    /**
513     * Bit mask for digit type options.
514     */
515    public static final int DIGIT_TYPE_MASK = 0x0100; // 0x3f00?
516
517    /**
518     * some constants
519     */
520    private static final char HAMZAFE_CHAR       = '\ufe80';
521    private static final char HAMZA06_CHAR       = '\u0621';
522    private static final char YEH_HAMZA_CHAR     = '\u0626';
523    private static final char YEH_HAMZAFE_CHAR   = '\uFE89';
524    private static final char LAMALEF_SPACE_SUB  = '\uffff';
525    private static final char TASHKEEL_SPACE_SUB = '\ufffe';
526    private static final char LAM_CHAR      = '\u0644';
527    private static final char SPACE_CHAR    = '\u0020';
528    private static final char SHADDA_CHAR   = '\uFE7C';
529    private static final char SHADDA06_CHAR = '\u0651';
530    private static final char TATWEEL_CHAR  = '\u0640';
531    private static final char SHADDA_TATWEEL_CHAR = '\uFE7D';
532    private static final char NEW_TAIL_CHAR = '\uFE73';
533    private static final char OLD_TAIL_CHAR = '\u200B';
534    private static final int SHAPE_MODE      = 0;
535    private static final int DESHAPE_MODE    = 1;
536
537    /**
538     */
539    @Override
540    public boolean equals(Object rhs) {
541        return rhs != null &&
542            rhs.getClass() == ArabicShaping.class &&
543            options == ((ArabicShaping)rhs).options;
544    }
545
546    /**
547     */
548     ///CLOVER:OFF
549    @Override
550    public int hashCode() {
551        return options;
552    }
553
554    /**
555     */
556    @Override
557    public String toString() {
558        StringBuilder buf = new StringBuilder(super.toString());
559        buf.append('[');
560
561        switch (options & LAMALEF_MASK) {
562        case LAMALEF_RESIZE: buf.append("LamAlef resize"); break;
563        case LAMALEF_NEAR: buf.append("LamAlef spaces at near"); break;
564        case LAMALEF_BEGIN: buf.append("LamAlef spaces at begin"); break;
565        case LAMALEF_END: buf.append("LamAlef spaces at end"); break;
566        case LAMALEF_AUTO: buf.append("lamAlef auto"); break;
567        }
568        switch (options & TEXT_DIRECTION_MASK) {
569        case TEXT_DIRECTION_LOGICAL: buf.append(", logical"); break;
570        case TEXT_DIRECTION_VISUAL_LTR: buf.append(", visual"); break;
571        }
572        switch (options & LETTERS_MASK) {
573        case LETTERS_NOOP: buf.append(", no letter shaping"); break;
574        case LETTERS_SHAPE: buf.append(", shape letters"); break;
575        case LETTERS_SHAPE_TASHKEEL_ISOLATED: buf.append(", shape letters tashkeel isolated"); break;
576        case LETTERS_UNSHAPE: buf.append(", unshape letters"); break;
577        }
578        switch (options & SEEN_MASK) {
579        case SEEN_TWOCELL_NEAR: buf.append(", Seen at near"); break;
580        }
581        switch (options & YEHHAMZA_MASK) {
582        case YEHHAMZA_TWOCELL_NEAR: buf.append(", Yeh Hamza at near"); break;
583        }
584        switch (options & TASHKEEL_MASK) {
585        case TASHKEEL_BEGIN: buf.append(", Tashkeel at begin"); break;
586        case TASHKEEL_END: buf.append(", Tashkeel at end"); break;
587        case TASHKEEL_REPLACE_BY_TATWEEL: buf.append(", Tashkeel replace with tatweel"); break;
588        case TASHKEEL_RESIZE: buf.append(", Tashkeel resize"); break;
589        }
590
591        switch (options & DIGITS_MASK) {
592        case DIGITS_NOOP: buf.append(", no digit shaping"); break;
593        case DIGITS_EN2AN: buf.append(", shape digits to AN"); break;
594        case DIGITS_AN2EN: buf.append(", shape digits to EN"); break;
595        case DIGITS_EN2AN_INIT_LR: buf.append(", shape digits to AN contextually: default EN"); break;
596        case DIGITS_EN2AN_INIT_AL: buf.append(", shape digits to AN contextually: default AL"); break;
597        }
598        switch (options & DIGIT_TYPE_MASK) {
599        case DIGIT_TYPE_AN: buf.append(", standard Arabic-Indic digits"); break;
600        case DIGIT_TYPE_AN_EXTENDED: buf.append(", extended Arabic-Indic digits"); break;
601        }
602        buf.append("]");
603
604        return buf.toString();
605    }
606    ///CLOVER:ON
607
608    //
609    // ported api
610    //
611
612    private static final int IRRELEVANT = 4;
613    private static final int LAMTYPE = 16;
614    private static final int ALEFTYPE = 32;
615
616    private static final int LINKR = 1;
617    private static final int LINKL = 2;
618    private static final int LINK_MASK = 3;
619
620    private static final int irrelevantPos[] = {
621        0x0, 0x2, 0x4, 0x6, 0x8, 0xA, 0xC, 0xE
622    };
623
624/*
625    private static final char convertLamAlef[] =  {
626        '\u0622', // FEF5
627        '\u0622', // FEF6
628        '\u0623', // FEF7
629        '\u0623', // FEF8
630        '\u0625', // FEF9
631        '\u0625', // FEFA
632        '\u0627', // FEFB
633        '\u0627'  // FEFC
634    };
635*/
636
637    private static final int tailFamilyIsolatedFinal[] = {
638        /* FEB1 */ 1,
639        /* FEB2 */ 1,
640        /* FEB3 */ 0,
641        /* FEB4 */ 0,
642        /* FEB5 */ 1,
643        /* FEB6 */ 1,
644        /* FEB7 */ 0,
645        /* FEB8 */ 0,
646        /* FEB9 */ 1,
647        /* FEBA */ 1,
648        /* FEBB */ 0,
649        /* FEBC */ 0,
650        /* FEBD */ 1,
651        /* FEBE */ 1
652    };
653
654    private static final int tashkeelMedial[] = {
655        /* FE70 */ 0,
656        /* FE71 */ 1,
657        /* FE72 */ 0,
658        /* FE73 */ 0,
659        /* FE74 */ 0,
660        /* FE75 */ 0,
661        /* FE76 */ 0,
662        /* FE77 */ 1,
663        /* FE78 */ 0,
664        /* FE79 */ 1,
665        /* FE7A */ 0,
666        /* FE7B */ 1,
667        /* FE7C */ 0,
668        /* FE7D */ 1,
669        /* FE7E */ 0,
670        /* FE7F */ 1
671    };
672
673    private static final char yehHamzaToYeh[] =
674    {
675    /* isolated*/ 0xFEEF,
676    /* final   */ 0xFEF0
677    };
678
679    private static final char convertNormalizedLamAlef[] = {
680        '\u0622', // 065C
681        '\u0623', // 065D
682        '\u0625', // 065E
683        '\u0627', // 065F
684    };
685
686    private static final int[] araLink = {
687        1           + 32 + 256 * 0x11,  /*0x0622*/
688        1           + 32 + 256 * 0x13,  /*0x0623*/
689        1                + 256 * 0x15,  /*0x0624*/
690        1           + 32 + 256 * 0x17,  /*0x0625*/
691        1 + 2            + 256 * 0x19,  /*0x0626*/
692        1           + 32 + 256 * 0x1D,  /*0x0627*/
693        1 + 2            + 256 * 0x1F,  /*0x0628*/
694        1                + 256 * 0x23,  /*0x0629*/
695        1 + 2            + 256 * 0x25,  /*0x062A*/
696        1 + 2            + 256 * 0x29,  /*0x062B*/
697        1 + 2            + 256 * 0x2D,  /*0x062C*/
698        1 + 2            + 256 * 0x31,  /*0x062D*/
699        1 + 2            + 256 * 0x35,  /*0x062E*/
700        1                + 256 * 0x39,  /*0x062F*/
701        1                + 256 * 0x3B,  /*0x0630*/
702        1                + 256 * 0x3D,  /*0x0631*/
703        1                + 256 * 0x3F,  /*0x0632*/
704        1 + 2            + 256 * 0x41,  /*0x0633*/
705        1 + 2            + 256 * 0x45,  /*0x0634*/
706        1 + 2            + 256 * 0x49,  /*0x0635*/
707        1 + 2            + 256 * 0x4D,  /*0x0636*/
708        1 + 2            + 256 * 0x51,  /*0x0637*/
709        1 + 2            + 256 * 0x55,  /*0x0638*/
710        1 + 2            + 256 * 0x59,  /*0x0639*/
711        1 + 2            + 256 * 0x5D,  /*0x063A*/
712        0, 0, 0, 0, 0,                  /*0x063B-0x063F*/
713        1 + 2,                          /*0x0640*/
714        1 + 2            + 256 * 0x61,  /*0x0641*/
715        1 + 2            + 256 * 0x65,  /*0x0642*/
716        1 + 2            + 256 * 0x69,  /*0x0643*/
717        1 + 2       + 16 + 256 * 0x6D,  /*0x0644*/
718        1 + 2            + 256 * 0x71,  /*0x0645*/
719        1 + 2            + 256 * 0x75,  /*0x0646*/
720        1 + 2            + 256 * 0x79,  /*0x0647*/
721        1                + 256 * 0x7D,  /*0x0648*/
722        1                + 256 * 0x7F,  /*0x0649*/
723        1 + 2            + 256 * 0x81,  /*0x064A*/
724        4, 4, 4, 4,                     /*0x064B-0x064E*/
725        4, 4, 4, 4,                     /*0x064F-0x0652*/
726        4, 4, 4, 0, 0,                  /*0x0653-0x0657*/
727        0, 0, 0, 0,                     /*0x0658-0x065B*/
728        1                + 256 * 0x85,  /*0x065C*/
729        1                + 256 * 0x87,  /*0x065D*/
730        1                + 256 * 0x89,  /*0x065E*/
731        1                + 256 * 0x8B,  /*0x065F*/
732        0, 0, 0, 0, 0,                  /*0x0660-0x0664*/
733        0, 0, 0, 0, 0,                  /*0x0665-0x0669*/
734        0, 0, 0, 0, 0, 0,               /*0x066A-0x066F*/
735        4,                              /*0x0670*/
736        0,                              /*0x0671*/
737        1           + 32,               /*0x0672*/
738        1           + 32,               /*0x0673*/
739        0,                              /*0x0674*/
740        1           + 32,               /*0x0675*/
741        1, 1,                           /*0x0676-0x0677*/
742        1+2, 1+2, 1+2, 1+2, 1+2, 1+2,   /*0x0678-0x067D*/
743        1+2, 1+2, 1+2, 1+2, 1+2, 1+2,   /*0x067E-0x0683*/
744        1+2, 1+2, 1+2, 1+2,             /*0x0684-0x0687*/
745        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,   /*0x0688-0x0691*/
746        1, 1, 1, 1, 1, 1, 1, 1,         /*0x0692-0x0699*/
747        1+2, 1+2, 1+2, 1+2, 1+2, 1+2,   /*0x069A-0x06A3*/
748        1+2, 1+2, 1+2, 1+2,             /*0x069A-0x06A3*/
749        1+2, 1+2, 1+2, 1+2, 1+2, 1+2,   /*0x06A4-0x06AD*/
750        1+2, 1+2, 1+2, 1+2,             /*0x06A4-0x06AD*/
751        1+2, 1+2, 1+2, 1+2, 1+2, 1+2,   /*0x06AE-0x06B7*/
752        1+2, 1+2, 1+2, 1+2,             /*0x06AE-0x06B7*/
753        1+2, 1+2, 1+2, 1+2, 1+2, 1+2,   /*0x06B8-0x06BF*/
754        1+2, 1+2,                       /*0x06B8-0x06BF*/
755        1,                              /*0x06C0*/
756        1+2,                            /*0x06C1*/
757        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,   /*0x06C2-0x06CB*/
758        1+2,                            /*0x06CC*/
759        1,                              /*0x06CD*/
760        1+2, 1+2, 1+2, 1+2,             /*0x06CE-0x06D1*/
761        1, 1                            /*0x06D2-0x06D3*/
762    };
763
764    private static final int[] presLink = {
765        1 + 2,                        /*0xFE70*/
766        1 + 2,                        /*0xFE71*/
767        1 + 2, 0, 1+ 2, 0, 1+ 2,      /*0xFE72-0xFE76*/
768        1 + 2,                        /*0xFE77*/
769        1+ 2, 1 + 2, 1+2, 1 + 2,      /*0xFE78-0xFE81*/
770        1+ 2, 1 + 2, 1+2, 1 + 2,      /*0xFE82-0xFE85*/
771        0, 0 + 32, 1 + 32, 0 + 32,    /*0xFE86-0xFE89*/
772        1 + 32, 0, 1,  0 + 32,        /*0xFE8A-0xFE8D*/
773        1 + 32, 0, 2,  1 + 2,         /*0xFE8E-0xFE91*/
774        1, 0 + 32, 1 + 32, 0,         /*0xFE92-0xFE95*/
775        2, 1 + 2, 1, 0,               /*0xFE96-0xFE99*/
776        1, 0, 2, 1 + 2,               /*0xFE9A-0xFE9D*/
777        1, 0, 2, 1 + 2,               /*0xFE9E-0xFEA1*/
778        1, 0, 2, 1 + 2,               /*0xFEA2-0xFEA5*/
779        1, 0, 2, 1 + 2,               /*0xFEA6-0xFEA9*/
780        1, 0, 2, 1 + 2,               /*0xFEAA-0xFEAD*/
781        1, 0, 1, 0,                   /*0xFEAE-0xFEB1*/
782        1, 0, 1, 0,                   /*0xFEB2-0xFEB5*/
783        1, 0, 2, 1+2,                 /*0xFEB6-0xFEB9*/
784        1, 0, 2, 1+2,                 /*0xFEBA-0xFEBD*/
785        1, 0, 2, 1+2,                 /*0xFEBE-0xFEC1*/
786        1, 0, 2, 1+2,                 /*0xFEC2-0xFEC5*/
787        1, 0, 2, 1+2,                 /*0xFEC6-0xFEC9*/
788        1, 0, 2, 1+2,                 /*0xFECA-0xFECD*/
789        1, 0, 2, 1+2,                 /*0xFECE-0xFED1*/
790        1, 0, 2, 1+2,                 /*0xFED2-0xFED5*/
791        1, 0, 2, 1+2,                 /*0xFED6-0xFED9*/
792        1, 0, 2, 1+2,                 /*0xFEDA-0xFEDD*/
793        1, 0, 2, 1+2,                 /*0xFEDE-0xFEE1*/
794        1, 0 + 16, 2 + 16, 1 + 2 +16, /*0xFEE2-0xFEE5*/
795        1 + 16, 0, 2, 1+2,            /*0xFEE6-0xFEE9*/
796        1, 0, 2, 1+2,                 /*0xFEEA-0xFEED*/
797        1, 0, 2, 1+2,                 /*0xFEEE-0xFEF1*/
798        1, 0, 1, 0,                   /*0xFEF2-0xFEF5*/
799        1, 0, 2, 1+2,                 /*0xFEF6-0xFEF9*/
800        1, 0, 1, 0,                   /*0xFEFA-0xFEFD*/
801        1, 0, 1, 0,
802        1
803    };
804
805    private static int[] convertFEto06 = {
806        /***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/
807        /*FE7*/   0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F, 0x650, 0x650, 0x651, 0x651, 0x652, 0x652,
808        /*FE8*/   0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626, 0x626, 0x626, 0x626, 0x627, 0x627, 0x628,
809        /*FE9*/   0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B, 0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C,
810        /*FEA*/   0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F, 0x62F, 0x630, 0x630, 0x631, 0x631, 0x632,
811        /*FEB*/   0x632, 0x633, 0x633, 0x633, 0x633, 0x634, 0x634, 0x634, 0x634, 0x635, 0x635, 0x635, 0x635, 0x636, 0x636, 0x636,
812        /*FEC*/   0x636, 0x637, 0x637, 0x637, 0x637, 0x638, 0x638, 0x638, 0x638, 0x639, 0x639, 0x639, 0x639, 0x63A, 0x63A, 0x63A,
813        /*FED*/   0x63A, 0x641, 0x641, 0x641, 0x641, 0x642, 0x642, 0x642, 0x642, 0x643, 0x643, 0x643, 0x643, 0x644, 0x644, 0x644,
814        /*FEE*/   0x644, 0x645, 0x645, 0x645, 0x645, 0x646, 0x646, 0x646, 0x646, 0x647, 0x647, 0x647, 0x647, 0x648, 0x648, 0x649,
815        /*FEF*/   0x649, 0x64A, 0x64A, 0x64A, 0x64A, 0x65C, 0x65C, 0x65D, 0x65D, 0x65E, 0x65E, 0x65F, 0x65F
816    };
817
818    private static final int shapeTable[][][] = {
819        { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,1} },
820        { {0,0,2,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} },
821        { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,3} },
822        { {0,0,1,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} }
823    };
824
825    /*
826     * This function shapes European digits to Arabic-Indic digits
827     * in-place, writing over the input characters.  Data is in visual
828     * order.
829     */
830    private void shapeToArabicDigitsWithContext(char[] dest,
831                                                int start,
832                                                int length,
833                                                char digitBase,
834                                                boolean lastStrongWasAL) {
835        UBiDiProps bdp=UBiDiProps.INSTANCE;
836        digitBase -= '0'; // move common adjustment out of loop
837
838        for(int i = start + length; --i >= start;) {
839            char ch = dest[i];
840            switch (bdp.getClass(ch)) {
841            case UCharacterDirection.LEFT_TO_RIGHT:
842            case UCharacterDirection.RIGHT_TO_LEFT:
843                lastStrongWasAL = false;
844                break;
845            case UCharacterDirection.RIGHT_TO_LEFT_ARABIC:
846                lastStrongWasAL = true;
847                break;
848            case UCharacterDirection.EUROPEAN_NUMBER:
849                if (lastStrongWasAL && ch <= '\u0039') {
850                    dest[i] = (char)(ch + digitBase);
851                }
852                break;
853            default:
854                break;
855            }
856        }
857    }
858
859    /*
860     * Name    : invertBuffer
861     * Function: This function inverts the buffer, it's used
862     *           in case the user specifies the buffer to be
863     *           TEXT_DIRECTION_LOGICAL
864     */
865    private static void invertBuffer(char[] buffer,
866                                     int start,
867                                     int length) {
868
869        for(int i = start, j = start + length - 1; i < j; i++, --j) {
870            char temp = buffer[i];
871            buffer[i] = buffer[j];
872            buffer[j] = temp;
873        }
874    }
875
876    /*
877     * Name    : changeLamAlef
878     * Function: Converts the Alef characters into an equivalent
879     *           LamAlef location in the 0x06xx Range, this is an
880     *           intermediate stage in the operation of the program
881     *           later it'll be converted into the 0xFExx LamAlefs
882     *           in the shaping function.
883     */
884    private static char changeLamAlef(char ch) {
885        switch(ch) {
886        case '\u0622': return '\u065C';
887        case '\u0623': return '\u065D';
888        case '\u0625': return '\u065E';
889        case '\u0627': return '\u065F';
890        default:  return '\u0000'; // not a lamalef
891        }
892    }
893
894    /*
895     * Name    : specialChar
896     * Function: Special Arabic characters need special handling in the shapeUnicode
897     *           function, this function returns 1 or 2 for these special characters
898     */
899    private static int specialChar(char ch) {
900        if ((ch > '\u0621' && ch < '\u0626') ||
901            (ch == '\u0627') ||
902            (ch > '\u062E' && ch < '\u0633') ||
903            (ch > '\u0647' && ch < '\u064A') ||
904            (ch == '\u0629')) {
905            return 1;
906        } else if (ch >= '\u064B' && ch<= '\u0652') {
907            return 2;
908        } else if (ch >= 0x0653 && ch <= 0x0655 ||
909                   ch == 0x0670 ||
910                   ch >= 0xFE70 && ch <= 0xFE7F) {
911            return 3;
912        } else {
913            return 0;
914        }
915    }
916
917    /*
918     * Name    : getLink
919     * Function: Resolves the link between the characters as
920     *           Arabic characters have four forms :
921     *           Isolated, Initial, Middle and Final Form
922     */
923    private static int getLink(char ch) {
924        if (ch >= '\u0622' && ch <= '\u06D3') {
925            return araLink[ch - '\u0622'];
926        } else if (ch == '\u200D') {
927            return 3;
928        } else if (ch >= '\u206D' && ch <= '\u206F') {
929            return 4;
930        } else if (ch >= '\uFE70' && ch <= '\uFEFC') {
931            return presLink[ch - '\uFE70'];
932        } else {
933            return 0;
934        }
935    }
936
937    /*
938     * Name    : countSpaces
939     * Function: Counts the number of spaces
940     *           at each end of the logical buffer
941     */
942    private static int countSpacesLeft(char[] dest,
943                                       int start,
944                                       int count) {
945        for (int i = start, e = start + count; i < e; ++i) {
946            if (dest[i] != SPACE_CHAR) {
947                return i - start;
948            }
949        }
950        return count;
951    }
952
953    private static int countSpacesRight(char[] dest,
954                                        int start,
955                                        int count) {
956
957        for (int i = start + count; --i >= start;) {
958            if (dest[i] != SPACE_CHAR) {
959                return start + count - 1 - i;
960            }
961        }
962        return count;
963    }
964
965    /*
966     * Name    : isTashkeelChar
967     * Function: Returns true for Tashkeel characters else return false
968     */
969    private static boolean isTashkeelChar(char ch) {
970        return ( ch >='\u064B' && ch <= '\u0652' );
971    }
972
973    /*
974     *Name     : isSeenTailFamilyChar
975     *Function : returns 1 if the character is a seen family isolated character
976     *           in the FE range otherwise returns 0
977     */
978
979    private static int isSeenTailFamilyChar(char ch) {
980        if (ch >= 0xfeb1 && ch < 0xfebf){
981             return tailFamilyIsolatedFinal [ch - 0xFEB1];
982        } else {
983             return 0;
984        }
985    }
986
987     /* Name     : isSeenFamilyChar
988      * Function : returns 1 if the character is a seen family character in the Unicode
989      *            06 range otherwise returns 0
990     */
991
992    private static int isSeenFamilyChar(char  ch){
993        if (ch >= 0x633 && ch <= 0x636){
994            return 1;
995        }else {
996            return 0;
997        }
998    }
999
1000    /*
1001     *Name     : isTailChar
1002     *Function : returns true if the character matches one of the tail characters
1003     *           (0xfe73 or 0x200b) otherwise returns false
1004     */
1005
1006    private static boolean isTailChar(char ch) {
1007        if(ch == OLD_TAIL_CHAR || ch == NEW_TAIL_CHAR){
1008                return true;
1009        }else{
1010                return false;
1011        }
1012    }
1013
1014    /*
1015     *Name     : isAlefMaksouraChar
1016     *Function : returns true if the character is a Alef Maksoura Final or isolated
1017     *           otherwise returns false
1018     */
1019    private static boolean isAlefMaksouraChar(char ch) {
1020        return ( (ch == 0xFEEF) || ( ch == 0xFEF0) || (ch == 0x0649));
1021    }
1022
1023    /*
1024     * Name     : isYehHamzaChar
1025     * Function : returns true if the character is a yehHamza isolated or yehhamza
1026     *            final is found otherwise returns false
1027     */
1028    private static boolean isYehHamzaChar(char ch) {
1029        if((ch==0xFE89)||(ch==0xFE8A)){
1030            return true;
1031        }else{
1032            return false;
1033        }
1034    }
1035
1036    /*
1037     *Name     : isTashkeelCharFE
1038     *Function : Returns true for Tashkeel characters in FE range else return false
1039     */
1040
1041    private static boolean isTashkeelCharFE(char ch) {
1042        return ( ch!=0xFE75 &&(ch>=0xFE70 && ch<= 0xFE7F) );
1043    }
1044
1045    /*
1046     * Name: isTashkeelOnTatweelChar
1047     * Function: Checks if the Tashkeel Character is on Tatweel or not,if the
1048     *           Tashkeel on tatweel (FE range), it returns 1 else if the
1049     *           Tashkeel with shadda on tatweel (FC range)return 2 otherwise
1050     *           returns 0
1051     */
1052    private static int isTashkeelOnTatweelChar(char ch){
1053        if (ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75 && ch != SHADDA_TATWEEL_CHAR)
1054        {
1055            return tashkeelMedial [ch - 0xFE70];
1056        } else if( (ch >= 0xfcf2 && ch <= 0xfcf4) || (ch == SHADDA_TATWEEL_CHAR)) {
1057            return 2;
1058        } else {
1059            return 0;
1060        }
1061    }
1062
1063    /*
1064     * Name: isIsolatedTashkeelChar
1065     * Function: Checks if the Tashkeel Character is in the isolated form
1066     *           (i.e. Unicode FE range) returns 1 else if the Tashkeel
1067     *           with shadda is in the isolated form (i.e. Unicode FC range)
1068     *           returns 1 otherwise returns 0
1069     */
1070    private static int isIsolatedTashkeelChar(char ch){
1071        if (ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75){
1072            return (1 - tashkeelMedial [ch - 0xFE70]);
1073        } else if(ch >= 0xfc5e && ch <= 0xfc63){
1074            return 1;
1075        } else{
1076            return 0;
1077        }
1078    }
1079
1080    /*
1081     * Name    : isAlefChar
1082     * Function: Returns 1 for Alef characters else return 0
1083     */
1084    private static boolean isAlefChar(char ch) {
1085        return ch == '\u0622' || ch == '\u0623' || ch == '\u0625' || ch == '\u0627';
1086    }
1087
1088    /*
1089     * Name    : isLamAlefChar
1090     * Function: Returns true for LamAlef characters else return false
1091     */
1092    private static boolean isLamAlefChar(char ch) {
1093        return ch >= '\uFEF5' && ch <= '\uFEFC';
1094    }
1095
1096    private static boolean isNormalizedLamAlefChar(char ch) {
1097        return ch >= '\u065C' && ch <= '\u065F';
1098    }
1099
1100    /*
1101     * Name    : calculateSize
1102     * Function: This function calculates the destSize to be used in preflighting
1103     *           when the destSize is equal to 0
1104     */
1105    private int calculateSize(char[] source,
1106                              int sourceStart,
1107                              int sourceLength) {
1108
1109        int destSize = sourceLength;
1110
1111        switch (options & LETTERS_MASK) {
1112        case LETTERS_SHAPE:
1113        case LETTERS_SHAPE_TASHKEEL_ISOLATED:
1114            if (isLogical) {
1115                for (int i = sourceStart, e = sourceStart + sourceLength - 1; i < e; ++i) {
1116                    if ((source[i] == LAM_CHAR && isAlefChar(source[i+1])) || isTashkeelCharFE(source[i])){
1117                        --destSize;
1118                    }
1119                }
1120            } else { // visual
1121                for(int i = sourceStart + 1, e = sourceStart + sourceLength; i < e; ++i) {
1122                    if ((source[i] == LAM_CHAR && isAlefChar(source[i-1])) || isTashkeelCharFE(source[i])) {
1123                        --destSize;
1124                    }
1125                }
1126            }
1127            break;
1128
1129        case LETTERS_UNSHAPE:
1130            for(int i = sourceStart, e = sourceStart + sourceLength; i < e; ++i) {
1131                if (isLamAlefChar(source[i])) {
1132                    destSize++;
1133                }
1134            }
1135            break;
1136
1137        default:
1138            break;
1139        }
1140
1141        return destSize;
1142    }
1143
1144
1145    /*
1146     * Name    : countSpaceSub
1147     * Function: Counts number of times the subChar appears in the array
1148     */
1149    private static int countSpaceSub(char [] dest,int length, char subChar){
1150        int i = 0;
1151        int count = 0;
1152        while (i < length) {
1153          if (dest[i] == subChar) {
1154              count++;
1155              }
1156          i++;
1157        }
1158        return count;
1159    }
1160
1161    /*
1162     * Name    : shiftArray
1163     * Function: Shifts characters to replace space sub characters
1164     */
1165    private static void shiftArray(char [] dest,int start, int e, char subChar){
1166        int w = e;
1167        int r = e;
1168        while (--r >= start) {
1169          char ch = dest[r];
1170          if (ch != subChar) {
1171            --w;
1172            if (w != r) {
1173              dest[w] = ch;
1174            }
1175          }
1176        }
1177   }
1178
1179    /*
1180     * Name    : flipArray
1181     * Function: inverts array, so that start becomes end and vice versa
1182     */
1183      private static int flipArray(char [] dest, int start, int e, int w){
1184        int r;
1185        if (w > start) {
1186        // shift, assume small buffer size so don't use arraycopy
1187          r = w;
1188          w = start;
1189          while (r < e) {
1190            dest[w++] = dest[r++];
1191           }
1192         } else {
1193             w = e;
1194         }
1195        return w;
1196      }
1197
1198    /*
1199     * Name     : handleTashkeelWithTatweel
1200     * Function : Replaces Tashkeel as following:
1201     *            Case 1 :if the Tashkeel on tatweel, replace it with Tatweel.
1202     *            Case 2 :if the Tashkeel aggregated with Shadda on Tatweel, replace
1203     *                   it with Shadda on Tatweel.
1204     *            Case 3: if the Tashkeel is isolated replace it with Space.
1205     *
1206     */
1207    private static int handleTashkeelWithTatweel(char[] dest, int sourceLength) {
1208                     int i;
1209                     for(i = 0; i < sourceLength; i++){
1210                         if((isTashkeelOnTatweelChar(dest[i]) == 1)){
1211                             dest[i] = TATWEEL_CHAR;
1212                        }else if((isTashkeelOnTatweelChar(dest[i]) == 2)){
1213                             dest[i] = SHADDA_TATWEEL_CHAR;
1214                        }else if((isIsolatedTashkeelChar(dest[i])==1) && dest[i] != SHADDA_CHAR){
1215                             dest[i] = SPACE_CHAR;
1216                        }
1217                     }
1218                     return sourceLength;
1219    }
1220
1221    /*
1222     *Name     : handleGeneratedSpaces
1223     *Function : The shapeUnicode function converts Lam + Alef into LamAlef + space,
1224     *           and Tashkeel to space.
1225     *           handleGeneratedSpaces function puts these generated spaces
1226     *           according to the options the user specifies. LamAlef and Tashkeel
1227     *           spaces can be replaced at begin, at end, at near or decrease the
1228     *           buffer size.
1229     *
1230     *           There is also Auto option for LamAlef and tashkeel, which will put
1231     *           the spaces at end of the buffer (or end of text if the user used
1232     *           the option SPACES_RELATIVE_TO_TEXT_BEGIN_END).
1233     *
1234     *           If the text type was visual_LTR and the option
1235     *           SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected the END
1236     *           option will place the space at the beginning of the buffer and
1237     *           BEGIN will place the space at the end of the buffer.
1238     */
1239  private int handleGeneratedSpaces(char[] dest,
1240            int start,
1241            int length) {
1242
1243      int lenOptionsLamAlef = options & LAMALEF_MASK;
1244      int lenOptionsTashkeel = options & TASHKEEL_MASK;
1245      boolean lamAlefOn = false;
1246      boolean tashkeelOn = false;
1247
1248      if (!isLogical & !spacesRelativeToTextBeginEnd) {
1249          switch (lenOptionsLamAlef) {
1250          case LAMALEF_BEGIN: lenOptionsLamAlef = LAMALEF_END; break;
1251          case LAMALEF_END: lenOptionsLamAlef = LAMALEF_BEGIN; break;
1252          default: break;
1253         }
1254          switch (lenOptionsTashkeel){
1255          case TASHKEEL_BEGIN: lenOptionsTashkeel = TASHKEEL_END; break;
1256          case TASHKEEL_END: lenOptionsTashkeel = TASHKEEL_BEGIN; break;
1257          default: break;
1258          }
1259        }
1260
1261
1262      if (lenOptionsLamAlef == LAMALEF_NEAR) {
1263          for (int i = start, e = i + length; i < e; ++i) {
1264              if (dest[i] == LAMALEF_SPACE_SUB) {
1265                  dest[i] = SPACE_CHAR;
1266              }
1267          }
1268
1269      } else {
1270
1271          final int e = start + length;
1272          int wL = countSpaceSub(dest, length, LAMALEF_SPACE_SUB);
1273          int wT = countSpaceSub(dest, length, TASHKEEL_SPACE_SUB);
1274
1275          if (lenOptionsLamAlef == LAMALEF_END){
1276            lamAlefOn = true;
1277          }
1278          if (lenOptionsTashkeel == TASHKEEL_END){
1279            tashkeelOn = true;
1280          }
1281
1282
1283          if (lamAlefOn && (lenOptionsLamAlef == LAMALEF_END)) {
1284            shiftArray(dest, start, e, LAMALEF_SPACE_SUB);
1285            while (wL > start) {
1286                dest[--wL] = SPACE_CHAR;
1287            }
1288          }
1289
1290          if (tashkeelOn && (lenOptionsTashkeel == TASHKEEL_END)){
1291            shiftArray(dest, start, e, TASHKEEL_SPACE_SUB);
1292            while (wT > start) {
1293                 dest[--wT] = SPACE_CHAR;
1294            }
1295          }
1296
1297          lamAlefOn = false;
1298          tashkeelOn = false;
1299
1300          if (lenOptionsLamAlef == LAMALEF_RESIZE){
1301            lamAlefOn = true;
1302          }
1303          if (lenOptionsTashkeel == TASHKEEL_RESIZE){
1304            tashkeelOn = true;
1305          }
1306
1307          if (lamAlefOn && (lenOptionsLamAlef == LAMALEF_RESIZE)){
1308              shiftArray(dest, start, e, LAMALEF_SPACE_SUB);
1309              wL = flipArray(dest,start,e, wL);
1310              length = wL - start;
1311          }
1312          if (tashkeelOn && (lenOptionsTashkeel == TASHKEEL_RESIZE)) {
1313              shiftArray(dest, start, e, TASHKEEL_SPACE_SUB);
1314              wT = flipArray(dest,start,e, wT);
1315              length = wT - start;
1316          }
1317
1318          lamAlefOn = false;
1319          tashkeelOn = false;
1320
1321          if ((lenOptionsLamAlef == LAMALEF_BEGIN) ||
1322              (lenOptionsLamAlef == LAMALEF_AUTO)){
1323                lamAlefOn = true;
1324          }
1325          if (lenOptionsTashkeel == TASHKEEL_BEGIN){
1326                tashkeelOn = true;
1327          }
1328
1329          if (lamAlefOn && ((lenOptionsLamAlef == LAMALEF_BEGIN)||
1330                            (lenOptionsLamAlef == LAMALEF_AUTO))) { // spaces at beginning
1331              shiftArray(dest, start, e, LAMALEF_SPACE_SUB);
1332               wL = flipArray(dest,start,e, wL);
1333                  while (wL < e) {
1334                      dest[wL++] = SPACE_CHAR;
1335                  }
1336              }
1337              if(tashkeelOn && (lenOptionsTashkeel == TASHKEEL_BEGIN)){
1338               shiftArray(dest, start, e, TASHKEEL_SPACE_SUB);
1339               wT = flipArray(dest,start,e, wT);
1340                  while (wT < e) {
1341                      dest[wT++] = SPACE_CHAR;
1342                  }
1343              }
1344           }
1345
1346      return length;
1347  }
1348
1349
1350  /*
1351   *Name     :expandCompositCharAtBegin
1352   *Function :Expands the LamAlef character to Lam and Alef consuming the required
1353   *         space from beginning of the buffer. If the text type was visual_LTR
1354   *         and the option SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected
1355   *         the spaces will be located at end of buffer.
1356   *         If there are no spaces to expand the LamAlef, an exception is thrown.
1357*/
1358 private boolean expandCompositCharAtBegin(char[] dest,int start, int length,
1359                            int lacount) {
1360     boolean spaceNotFound = false;
1361
1362     if (lacount > countSpacesRight(dest, start, length)) {
1363         spaceNotFound = true;
1364         return spaceNotFound;
1365     }
1366     for (int r = start + length - lacount, w = start + length; --r >= start;) {
1367         char ch = dest[r];
1368         if (isNormalizedLamAlefChar(ch)) {
1369             dest[--w] = LAM_CHAR;
1370             dest[--w] = convertNormalizedLamAlef[ch - '\u065C'];
1371         } else {
1372             dest[--w] = ch;
1373         }
1374     }
1375     return spaceNotFound;
1376
1377  }
1378
1379  /*
1380   *Name     : expandCompositCharAtEnd
1381   *Function : Expands the LamAlef character to Lam and Alef consuming the
1382   *           required space from end of the buffer. If the text type was
1383   *           Visual LTR and the option SPACES_RELATIVE_TO_TEXT_BEGIN_END
1384   *           was used, the spaces will be consumed from begin of buffer. If
1385   *           there are no spaces to expand the LamAlef, an exception is thrown.
1386   */
1387
1388  private boolean  expandCompositCharAtEnd(char[] dest,int start, int length,
1389                          int lacount){
1390      boolean spaceNotFound = false;
1391
1392      if (lacount > countSpacesLeft(dest, start, length)) {
1393          spaceNotFound = true;
1394          return spaceNotFound;
1395      }
1396      for (int r = start + lacount, w = start, e = start + length; r < e; ++r) {
1397          char ch = dest[r];
1398          if (isNormalizedLamAlefChar(ch)) {
1399              dest[w++] = convertNormalizedLamAlef[ch - '\u065C'];
1400              dest[w++] = LAM_CHAR;
1401          } else {
1402              dest[w++] = ch;
1403          }
1404      }
1405      return spaceNotFound;
1406  }
1407
1408  /*
1409   *Name     : expandCompositCharAtNear
1410   *Function : Expands the LamAlef character into Lam + Alef, YehHamza character
1411   *           into Yeh + Hamza, SeenFamily character into SeenFamily character
1412   *           + Tail, while consuming the space next to the character.
1413   */
1414
1415  private boolean expandCompositCharAtNear(char[] dest,int start, int length,
1416                                       int yehHamzaOption, int seenTailOption, int lamAlefOption){
1417
1418      boolean spaceNotFound = false;
1419
1420
1421
1422      if (isNormalizedLamAlefChar(dest[start])) {
1423          spaceNotFound = true;
1424          return spaceNotFound;
1425      }
1426      for (int i = start + length; --i >=start;) {
1427          char ch = dest[i];
1428          if (lamAlefOption == 1 && isNormalizedLamAlefChar(ch)) {
1429              if (i>start &&dest[i-1] == SPACE_CHAR) {
1430                  dest[i] = LAM_CHAR;
1431                  dest[--i] = convertNormalizedLamAlef[ch - '\u065C'];
1432              } else {
1433                  spaceNotFound = true;
1434                  return spaceNotFound;
1435              }
1436          }else if(seenTailOption == 1 && isSeenTailFamilyChar(ch) == 1){
1437              if(i>start &&dest[i-1] == SPACE_CHAR){
1438                  dest[i-1] = tailChar;
1439              } else{
1440                  spaceNotFound = true;
1441                  return spaceNotFound;
1442              }
1443          }else if(yehHamzaOption == 1 && isYehHamzaChar(ch)){
1444
1445               if(i>start &&dest[i-1] == SPACE_CHAR){
1446                  dest[i] = yehHamzaToYeh[ch - YEH_HAMZAFE_CHAR];
1447                  dest[i-1] = HAMZAFE_CHAR;
1448              }else{
1449                  spaceNotFound = true;
1450                  return spaceNotFound;
1451                }
1452
1453
1454          }
1455      }
1456      return false;
1457
1458  }
1459
1460    /*
1461     * Name    : expandCompositChar
1462     * Function: LamAlef needs special handling as the LamAlef is
1463     *           one character while expanding it will give two
1464     *           characters Lam + Alef, so we need to expand the LamAlef
1465     *           in near or far spaces according to the options the user
1466     *           specifies or increase the buffer size.
1467     *           Dest has enough room for the expansion if we are growing.
1468     *           lamalef are normalized to the 'special characters'
1469     */
1470    private int expandCompositChar(char[] dest,
1471                              int start,
1472                              int length,
1473                              int lacount,
1474                              int shapingMode) throws ArabicShapingException {
1475
1476        int lenOptionsLamAlef = options & LAMALEF_MASK;
1477        int lenOptionsSeen = options & SEEN_MASK;
1478        int lenOptionsYehHamza = options & YEHHAMZA_MASK;
1479        boolean spaceNotFound = false;
1480
1481        if (!isLogical && !spacesRelativeToTextBeginEnd) {
1482            switch (lenOptionsLamAlef) {
1483            case LAMALEF_BEGIN: lenOptionsLamAlef = LAMALEF_END; break;
1484            case LAMALEF_END: lenOptionsLamAlef = LAMALEF_BEGIN; break;
1485            default: break;
1486            }
1487        }
1488
1489        if(shapingMode == 1){
1490            if(lenOptionsLamAlef == LAMALEF_AUTO){
1491                if(isLogical){
1492                    spaceNotFound = expandCompositCharAtEnd(dest, start, length, lacount);
1493                    if(spaceNotFound){
1494                        spaceNotFound = expandCompositCharAtBegin(dest, start, length, lacount);
1495                    }
1496                    if(spaceNotFound){
1497                        spaceNotFound = expandCompositCharAtNear(dest, start, length,0,0,1);
1498                    }
1499                    if(spaceNotFound){
1500                        throw new ArabicShapingException("No spacefor lamalef");
1501                    }
1502                }else{
1503                    spaceNotFound = expandCompositCharAtBegin(dest, start, length, lacount);
1504                    if(spaceNotFound){
1505                        spaceNotFound = expandCompositCharAtEnd(dest, start, length, lacount);
1506                    }
1507                    if(spaceNotFound){
1508                        spaceNotFound = expandCompositCharAtNear(dest, start, length,0,0,1);
1509                    }
1510                    if(spaceNotFound){
1511                        throw new ArabicShapingException("No spacefor lamalef");
1512                    }
1513                }
1514            }else if(lenOptionsLamAlef == LAMALEF_END){
1515                spaceNotFound = expandCompositCharAtEnd(dest, start, length, lacount);
1516                if(spaceNotFound){
1517                    throw new ArabicShapingException("No spacefor lamalef");
1518                }
1519            }else if(lenOptionsLamAlef == LAMALEF_BEGIN){
1520                spaceNotFound = expandCompositCharAtBegin(dest, start, length, lacount);
1521                if(spaceNotFound){
1522                    throw new ArabicShapingException("No spacefor lamalef");
1523                }
1524            }else if(lenOptionsLamAlef == LAMALEF_NEAR){
1525                spaceNotFound = expandCompositCharAtNear(dest, start, length,0,0,1);
1526                if(spaceNotFound){
1527                    throw new ArabicShapingException("No spacefor lamalef");
1528            }
1529            }else if(lenOptionsLamAlef == LAMALEF_RESIZE){
1530                for (int r = start + length, w = r + lacount; --r >= start;) {
1531                    char ch = dest[r];
1532                    if (isNormalizedLamAlefChar(ch)) {
1533                        dest[--w] = '\u0644';
1534                        dest[--w] = convertNormalizedLamAlef[ch - '\u065C'];
1535                    } else {
1536                        dest[--w] = ch;
1537                    }
1538                }
1539                length += lacount;
1540            }
1541            }else{
1542                if(lenOptionsSeen == SEEN_TWOCELL_NEAR){
1543                spaceNotFound = expandCompositCharAtNear(dest, start, length,0,1,0);
1544                if(spaceNotFound){
1545                    throw new ArabicShapingException("No space for Seen tail expansion");
1546                }
1547            }
1548            if(lenOptionsYehHamza == YEHHAMZA_TWOCELL_NEAR){
1549                spaceNotFound = expandCompositCharAtNear(dest, start, length,1,0,0);
1550                if(spaceNotFound){
1551                    throw new ArabicShapingException("No space for YehHamza expansion");
1552                }
1553            }
1554            }
1555        return length;
1556    }
1557
1558
1559    /* Convert the input buffer from FExx Range into 06xx Range
1560     * to put all characters into the 06xx range
1561     * even the lamalef is converted to the special region in
1562     * the 06xx range.  Return the number of lamalef chars found.
1563     */
1564    private int normalize(char[] dest, int start, int length) {
1565        int lacount = 0;
1566        for (int i = start, e = i + length; i < e; ++i) {
1567            char ch = dest[i];
1568            if (ch >= '\uFE70' && ch <= '\uFEFC') {
1569                if (isLamAlefChar(ch)) {
1570                    ++lacount;
1571                }
1572                dest[i] = (char)convertFEto06[ch - '\uFE70'];
1573            }
1574        }
1575        return lacount;
1576    }
1577
1578    /*
1579     * Name    : deshapeNormalize
1580     * Function: Convert the input buffer from FExx Range into 06xx Range
1581     *           even the lamalef is converted to the special region in the 06xx range.
1582     *           According to the options the user enters, all seen family characters
1583     *           followed by a tail character are merged to seen tail family character and
1584     *           any yeh followed by a hamza character are merged to yehhamza character.
1585     *           Method returns the number of lamalef chars found.
1586     */
1587    private int deshapeNormalize(char[] dest, int start, int length) {
1588        int lacount = 0;
1589        int yehHamzaComposeEnabled = 0;
1590        int seenComposeEnabled = 0;
1591
1592        yehHamzaComposeEnabled = ((options&YEHHAMZA_MASK) == YEHHAMZA_TWOCELL_NEAR) ? 1 : 0;
1593        seenComposeEnabled = ((options&SEEN_MASK) == SEEN_TWOCELL_NEAR)? 1 : 0;
1594
1595        for (int i = start, e = i + length; i < e; ++i) {
1596            char ch = dest[i];
1597
1598        if( (yehHamzaComposeEnabled == 1) && ((ch == HAMZA06_CHAR) || (ch == HAMZAFE_CHAR))
1599               && (i < (length - 1)) && isAlefMaksouraChar(dest[i+1] )) {
1600                dest[i] = SPACE_CHAR;
1601                dest[i+1] = YEH_HAMZA_CHAR;
1602       } else if ( (seenComposeEnabled == 1) && (isTailChar(ch)) && (i< (length - 1))
1603                       && (isSeenTailFamilyChar(dest[i+1])==1) ) {
1604               dest[i] = SPACE_CHAR;
1605       }
1606       else if (ch >= '\uFE70' && ch <= '\uFEFC') {
1607                if (isLamAlefChar(ch)) {
1608                    ++lacount;
1609                }
1610                dest[i] = (char)convertFEto06[ch - '\uFE70'];
1611            }
1612        }
1613        return lacount;
1614    }
1615
1616    /*
1617     * Name    : shapeUnicode
1618     * Function: Converts an Arabic Unicode buffer in 06xx Range into a shaped
1619     *           arabic Unicode buffer in FExx Range
1620     */
1621    private int shapeUnicode(char[] dest,
1622                             int start,
1623                             int length,
1624                             int destSize,
1625                             int tashkeelFlag)throws ArabicShapingException {
1626
1627        int lamalef_count = normalize(dest, start, length);
1628
1629        // resolve the link between the characters.
1630        // Arabic characters have four forms: Isolated, Initial, Medial and Final.
1631        // Tashkeel characters have two, isolated or medial, and sometimes only isolated.
1632        // tashkeelFlag == 0: shape normally, 1: shape isolated, 2: don't shape
1633
1634        boolean lamalef_found = false, seenfam_found = false;
1635        boolean yehhamza_found = false, tashkeel_found = false;
1636        int i = start + length - 1;
1637        int currLink = getLink(dest[i]);
1638        int nextLink = 0;
1639        int prevLink = 0;
1640        int lastLink = 0;
1641        //int prevPos = i;
1642        int lastPos = i;
1643        int nx = -2;
1644        int nw = 0;
1645
1646        while (i >= 0) {
1647            // If high byte of currLink != 0 then there might be more than one shape
1648            if ((currLink & '\uFF00') != 0 || isTashkeelChar(dest[i])) {
1649                nw = i - 1;
1650                nx = -2;
1651                while (nx < 0) { // we need to know about next char
1652                    if (nw == -1) {
1653                        nextLink = 0;
1654                        nx = Integer.MAX_VALUE;
1655                    } else {
1656                        nextLink = getLink(dest[nw]);
1657                        if ((nextLink & IRRELEVANT) == 0) {
1658                            nx = nw;
1659                        } else {
1660                            --nw;
1661                        }
1662                    }
1663                }
1664
1665                if (((currLink & ALEFTYPE) > 0) && ((lastLink & LAMTYPE) > 0)) {
1666                    lamalef_found = true;
1667                    char wLamalef = changeLamAlef(dest[i]); // get from 0x065C-0x065f
1668                    if (wLamalef != '\u0000') {
1669                        // replace alef by marker, it will be removed later
1670                        dest[i] = '\uffff';
1671                        dest[lastPos] = wLamalef;
1672                        i = lastPos;
1673                    }
1674
1675                    lastLink = prevLink;
1676                    currLink = getLink(wLamalef); // requires '\u0000', unfortunately
1677                }
1678                if ((i > 0) && (dest[i-1] == SPACE_CHAR))
1679                {
1680                    if ( isSeenFamilyChar(dest[i]) == 1){
1681                        seenfam_found = true;
1682                    } else if (dest[i] == YEH_HAMZA_CHAR) {
1683                        yehhamza_found = true;
1684                    }
1685                }
1686                else if(i==0){
1687                    if ( isSeenFamilyChar(dest[i]) == 1){
1688                        seenfam_found = true;
1689                    } else if (dest[i] == YEH_HAMZA_CHAR) {
1690                        yehhamza_found = true;
1691                    }
1692                }
1693
1694
1695                // get the proper shape according to link ability of neighbors
1696                // and of character; depends on the order of the shapes
1697                // (isolated, initial, middle, final) in the compatibility area
1698
1699                int flag = specialChar(dest[i]);
1700
1701                int shape = shapeTable[nextLink & LINK_MASK]
1702                    [lastLink & LINK_MASK]
1703                    [currLink & LINK_MASK];
1704
1705                if (flag == 1) {
1706                    shape &= 0x1;
1707                } else if (flag == 2) {
1708                    if (tashkeelFlag == 0 &&
1709                        ((lastLink & LINKL) != 0) &&
1710                        ((nextLink & LINKR) != 0) &&
1711                        dest[i] != '\u064C' &&
1712                        dest[i] != '\u064D' &&
1713                        !((nextLink & ALEFTYPE) == ALEFTYPE &&
1714                          (lastLink & LAMTYPE) == LAMTYPE)) {
1715
1716                        shape = 1;
1717
1718                    } else if(tashkeelFlag == 2 && dest[i] == SHADDA06_CHAR){
1719                        shape = 1;
1720
1721                    } else {
1722                        shape = 0;
1723                    }
1724                }
1725                if (flag == 2) {
1726                    if (tashkeelFlag == 2 && dest[i] != SHADDA06_CHAR) {
1727                        dest[i] = TASHKEEL_SPACE_SUB;
1728                        tashkeel_found = true;
1729                    }
1730                    else{
1731                        dest[i] = (char)('\uFE70' + irrelevantPos[dest[i] - '\u064B'] + shape);
1732                    }
1733                    // else leave tashkeel alone
1734                } else {
1735                    dest[i] = (char)('\uFE70' + (currLink >> 8) + shape);
1736                }
1737            }
1738
1739            // move one notch forward
1740            if ((currLink & IRRELEVANT) == 0) {
1741                prevLink = lastLink;
1742                lastLink = currLink;
1743                //prevPos = lastPos;
1744                lastPos = i;
1745            }
1746
1747            --i;
1748            if (i == nx) {
1749                currLink = nextLink;
1750                nx = -2;
1751            } else if (i != -1) {
1752                currLink = getLink(dest[i]);
1753            }
1754        }
1755
1756        // If we found a lam/alef pair in the buffer
1757        // call handleGeneratedSpaces to remove the spaces that were added
1758
1759        destSize = length;
1760        if (lamalef_found || tashkeel_found) {
1761            destSize = handleGeneratedSpaces(dest, start, length);
1762        }
1763        if (seenfam_found || yehhamza_found){
1764            destSize = expandCompositChar(dest, start, destSize, lamalef_count, SHAPE_MODE);
1765        }
1766        return destSize;
1767    }
1768
1769    /*
1770     * Name    : deShapeUnicode
1771     * Function: Converts an Arabic Unicode buffer in FExx Range into unshaped
1772     *           arabic Unicode buffer in 06xx Range
1773     */
1774    private int deShapeUnicode(char[] dest,
1775                               int start,
1776                               int length,
1777                               int destSize) throws ArabicShapingException {
1778
1779        int lamalef_count = deshapeNormalize(dest, start, length);
1780
1781        // If there was a lamalef in the buffer call expandLamAlef
1782        if (lamalef_count != 0) {
1783            // need to adjust dest to fit expanded buffer... !!!
1784            destSize = expandCompositChar(dest, start, length, lamalef_count,DESHAPE_MODE);
1785        } else {
1786            destSize = length;
1787        }
1788
1789        return destSize;
1790    }
1791
1792    private int internalShape(char[] source,
1793                              int sourceStart,
1794                              int sourceLength,
1795                              char[] dest,
1796                              int destStart,
1797                              int destSize) throws ArabicShapingException {
1798
1799        if (sourceLength == 0) {
1800            return 0;
1801        }
1802
1803        if (destSize == 0) {
1804            if (((options & LETTERS_MASK) != LETTERS_NOOP) &&
1805                ((options & LAMALEF_MASK) == LAMALEF_RESIZE)) {
1806
1807                return calculateSize(source, sourceStart, sourceLength);
1808            } else {
1809                return sourceLength; // by definition
1810            }
1811        }
1812
1813        // always use temp buffer
1814        char[] temp = new char[sourceLength * 2]; // all lamalefs requiring expansion
1815        System.arraycopy(source, sourceStart, temp, 0, sourceLength);
1816
1817        if (isLogical) {
1818            invertBuffer(temp, 0, sourceLength);
1819        }
1820
1821        int outputSize = sourceLength;
1822
1823        switch (options & LETTERS_MASK) {
1824        case LETTERS_SHAPE_TASHKEEL_ISOLATED:
1825            outputSize = shapeUnicode(temp, 0, sourceLength, destSize, 1);
1826            break;
1827
1828        case LETTERS_SHAPE:
1829            if( ((options&TASHKEEL_MASK) != 0) &&
1830                ((options&TASHKEEL_MASK) !=TASHKEEL_REPLACE_BY_TATWEEL)) {
1831                   /* Call the shaping function with tashkeel flag == 2 for removal of tashkeel */
1832                outputSize = shapeUnicode(temp, 0, sourceLength, destSize, 2);
1833                }else {
1834                   //default Call the shaping function with tashkeel flag == 1 */
1835                    outputSize = shapeUnicode(temp, 0, sourceLength, destSize, 0);
1836
1837                   /*After shaping text check if user wants to remove tashkeel and replace it with tatweel*/
1838                   if( (options&TASHKEEL_MASK) == TASHKEEL_REPLACE_BY_TATWEEL){
1839                       outputSize = handleTashkeelWithTatweel(temp,sourceLength);
1840                   }
1841               }
1842            break;
1843
1844        case LETTERS_UNSHAPE:
1845            outputSize = deShapeUnicode(temp, 0, sourceLength, destSize);
1846            break;
1847
1848        default:
1849            break;
1850        }
1851
1852        if (outputSize > destSize) {
1853            throw new ArabicShapingException("not enough room for result data");
1854        }
1855
1856        if ((options & DIGITS_MASK) != DIGITS_NOOP) {
1857            char digitBase = '\u0030'; // European digits
1858            switch (options & DIGIT_TYPE_MASK) {
1859            case DIGIT_TYPE_AN:
1860                digitBase = '\u0660';  // Arabic-Indic digits
1861                break;
1862
1863            case DIGIT_TYPE_AN_EXTENDED:
1864                digitBase = '\u06f0';  // Eastern Arabic-Indic digits (Persian and Urdu)
1865                break;
1866
1867            default:
1868                break;
1869            }
1870
1871            switch (options & DIGITS_MASK) {
1872            case DIGITS_EN2AN:
1873                {
1874                    int digitDelta = digitBase - '\u0030';
1875                    for (int i = 0; i < outputSize; ++i) {
1876                        char ch = temp[i];
1877                        if (ch <= '\u0039' && ch >= '\u0030') {
1878                            temp[i] += digitDelta;
1879                        }
1880                    }
1881                }
1882                break;
1883
1884            case DIGITS_AN2EN:
1885                {
1886                    char digitTop = (char)(digitBase + 9);
1887                    int digitDelta = '\u0030' - digitBase;
1888                    for (int i = 0; i < outputSize; ++i) {
1889                        char ch = temp[i];
1890                        if (ch <= digitTop && ch >= digitBase) {
1891                            temp[i] += digitDelta;
1892                        }
1893                    }
1894                }
1895                break;
1896
1897            case DIGITS_EN2AN_INIT_LR:
1898                shapeToArabicDigitsWithContext(temp, 0, outputSize, digitBase, false);
1899                break;
1900
1901            case DIGITS_EN2AN_INIT_AL:
1902                shapeToArabicDigitsWithContext(temp, 0, outputSize, digitBase, true);
1903                break;
1904
1905            default:
1906                break;
1907            }
1908        }
1909
1910        if (isLogical) {
1911            invertBuffer(temp, 0, outputSize);
1912        }
1913
1914        System.arraycopy(temp, 0, dest, destStart, outputSize);
1915
1916        return outputSize;
1917    }
1918}
1919