1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/*
5 *******************************************************************************
6 *
7 *   Copyright (C) 1999-2014, International Business Machines
8 *   Corporation and others.  All Rights Reserved.
9 *
10 *******************************************************************************
11 */
12
13package android.icu.lang;
14
15import android.icu.text.UTF16;
16
17/**
18 * <code>UScriptRun</code> is used to find runs of characters in
19 * the same script, as defined in the <code>UScript</code> class.
20 * It implements a simple iterator over an array of characters.
21 * The iterator will assign <code>COMMON</code> and <code>INHERITED</code>
22 * characters to the same script as the preceeding characters. If the
23 * COMMON and INHERITED characters are first, they will be assigned to
24 * the same script as the following characters.
25 *
26 * The iterator will try to match paired punctuation. If it sees an
27 * opening punctuation character, it will remember the script that
28 * was assigned to that character, and assign the same script to the
29 * matching closing punctuation.
30 *
31 * No attempt is made to combine related scripts into a single run. In
32 * particular, Hiragana, Katakana, and Han characters will appear in separate
33 * runs.
34
35 * Here is an example of how to iterate over script runs:
36 * <pre>
37 * void printScriptRuns(char[] text)
38 * {
39 *     UScriptRun scriptRun = new UScriptRun(text);
40 *
41 *     while (scriptRun.next()) {
42 *         int start  = scriptRun.getScriptStart();
43 *         int limit  = scriptRun.getScriptLimit();
44 *         int script = scriptRun.getScriptCode();
45 *
46 *         System.out.println("Script \"" + UScript.getName(script) + "\" from " +
47 *                            start + " to " + limit + ".");
48 *     }
49 *  }
50 * </pre>
51 *
52 * @deprecated This API is ICU internal only.
53 * @hide Only a subset of ICU is exposed in Android
54 * @hide draft / provisional / internal are hidden on Android
55 */
56@Deprecated
57public final class UScriptRun
58{
59    /**
60     * Construct an empty <code>UScriptRun</code> object. The <code>next()</code>
61     * method will return <code>false</code> the first time it is called.
62     *
63     * @deprecated This API is ICU internal only.
64     * @hide draft / provisional / internal are hidden on Android
65     */
66    @Deprecated
67    public UScriptRun()
68    {
69        char[] nullChars = null;
70
71        reset(nullChars, 0, 0);
72    }
73
74    /**
75     * Construct a <code>UScriptRun</code> object which iterates over the
76     * characters in the given string.
77     *
78     * @param text the string of characters over which to iterate.
79     *
80     * @deprecated This API is ICU internal only.
81     * @hide draft / provisional / internal are hidden on Android
82     */
83    @Deprecated
84    public UScriptRun(String text)
85    {
86        reset (text);
87    }
88
89    /**
90     * Construct a <code>UScriptRun</code> object which iterates over a subrange
91     * of the characetrs in the given string.
92     *
93     * @param text the string of characters over which to iterate.
94     * @param start the index of the first character over which to iterate
95     * @param count the number of characters over which to iterate
96     *
97     * @deprecated This API is ICU internal only.
98     * @hide draft / provisional / internal are hidden on Android
99     */
100    @Deprecated
101    public UScriptRun(String text, int start, int count)
102    {
103        reset(text, start, count);
104    }
105
106    /**
107     * Construct a <code>UScriptRun</code> object which iterates over the given
108     * characetrs.
109     *
110     * @param chars the array of characters over which to iterate.
111     *
112     * @deprecated This API is ICU internal only.
113     * @hide draft / provisional / internal are hidden on Android
114     */
115    @Deprecated
116    public UScriptRun(char[] chars)
117    {
118        reset(chars);
119    }
120
121    /**
122     * Construct a <code>UScriptRun</code> object which iterates over a subrange
123     * of the given characetrs.
124     *
125     * @param chars the array of characters over which to iterate.
126     * @param start the index of the first character over which to iterate
127     * @param count the number of characters over which to iterate
128     *
129     * @deprecated This API is ICU internal only.
130     * @hide draft / provisional / internal are hidden on Android
131     */
132    @Deprecated
133    public UScriptRun(char[] chars, int start, int count)
134    {
135        reset(chars, start, count);
136    }
137
138
139    /**
140     * Reset the iterator to the start of the text.
141     *
142     * @deprecated This API is ICU internal only.
143     * @hide draft / provisional / internal are hidden on Android
144     */
145    @Deprecated
146    public final void reset()
147    {
148        // empty any old parenStack contents.
149        // NOTE: this is not the most efficient way
150        // to do this, but it's the easiest to write...
151        while (stackIsNotEmpty()) {
152            pop();
153        }
154
155        scriptStart = textStart;
156        scriptLimit = textStart;
157        scriptCode  = UScript.INVALID_CODE;
158        parenSP     = -1;
159        pushCount   =  0;
160        fixupCount  =  0;
161
162        textIndex = textStart;
163    }
164
165    /**
166     * Reset the iterator to iterate over the given range of the text. Throws
167     * IllegalArgumentException if the range is outside of the bounds of the
168     * character array.
169     *
170     * @param start the index of the new first character over which to iterate
171     * @param count the new number of characters over which to iterate.
172     * @exception IllegalArgumentException If invalid arguments are passed.
173     *
174     * @deprecated This API is ICU internal only.
175     * @hide draft / provisional / internal are hidden on Android
176     */
177    @Deprecated
178    public final void reset(int start, int count)
179    throws IllegalArgumentException
180    {
181        int len = 0;
182
183        if (text != null) {
184            len = text.length;
185        }
186
187        if (start < 0 || count < 0 || start > len - count) {
188            throw new IllegalArgumentException();
189        }
190
191        textStart = start;
192        textLimit = start + count;
193
194        reset();
195    }
196
197    /**
198     * Reset the iterator to iterate over <code>count</code> characters
199     * in <code>chars</code> starting at <code>start</code>. This allows
200     * clients to reuse an iterator.
201     *
202     * @param chars the new array of characters over which to iterate.
203     * @param start the index of the first character over which to iterate.
204     * @param count the number of characters over which to iterate.
205     *
206     * @deprecated This API is ICU internal only.
207     * @hide draft / provisional / internal are hidden on Android
208     */
209    @Deprecated
210    public final void reset(char[] chars, int start, int count)
211    {
212        if (chars == null) {
213            chars = emptyCharArray;
214        }
215
216        text = chars;
217
218        reset(start, count);
219    }
220
221    /**
222     * Reset the iterator to iterate over the characters
223     * in <code>chars</code>. This allows clients to reuse an iterator.
224     *
225     * @param chars the new array of characters over which to iterate.
226     *
227     * @deprecated This API is ICU internal only.
228     * @hide draft / provisional / internal are hidden on Android
229     */
230    @Deprecated
231    public final void reset(char[] chars)
232    {
233        int length = 0;
234
235        if (chars != null) {
236            length = chars.length;
237        }
238
239        reset(chars, 0, length);
240    }
241
242    /**
243     * Reset the iterator to iterate over <code>count</code> characters
244     * in <code>text</code> starting at <code>start</code>. This allows
245     * clients to reuse an iterator.
246     *
247     * @param str the new string of characters over which to iterate.
248     * @param start the index of the first character over which to iterate.
249     * @param count the nuber of characters over which to iterate.
250     *
251     * @deprecated This API is ICU internal only.
252     * @hide draft / provisional / internal are hidden on Android
253     */
254    @Deprecated
255    public final void reset(String str, int start, int count)
256    {
257        char[] chars = null;
258
259        if (str != null) {
260            chars = str.toCharArray();
261        }
262
263        reset(chars, start, count);
264    }
265
266    /**
267     * Reset the iterator to iterate over the characters
268     * in <code>text</code>. This allows clients to reuse an iterator.
269     *
270     * @param str the new string of characters over which to iterate.
271     *
272     * @deprecated This API is ICU internal only.
273     * @hide draft / provisional / internal are hidden on Android
274     */
275    @Deprecated
276    public final void reset(String str)
277    {
278        int length   = 0;
279
280        if (str != null) {
281            length = str.length();
282        }
283
284        reset(str, 0, length);
285    }
286
287
288
289    /**
290     * Get the starting index of the current script run.
291     *
292     * @return the index of the first character in the current script run.
293     *
294     * @deprecated This API is ICU internal only.
295     * @hide draft / provisional / internal are hidden on Android
296     */
297    @Deprecated
298    public final int getScriptStart()
299    {
300        return scriptStart;
301    }
302
303    /**
304     * Get the index of the first character after the current script run.
305     *
306     * @return the index of the first character after the current script run.
307     *
308     * @deprecated This API is ICU internal only.
309     * @hide draft / provisional / internal are hidden on Android
310     */
311    @Deprecated
312    public final int getScriptLimit()
313    {
314        return scriptLimit;
315    }
316
317    /**
318     * Get the script code for the script of the current script run.
319     *
320     * @return the script code for the script of the current script run.
321     * @see android.icu.lang.UScript
322     *
323     * @deprecated This API is ICU internal only.
324     * @hide draft / provisional / internal are hidden on Android
325     */
326    @Deprecated
327    public final int getScriptCode()
328    {
329        return scriptCode;
330    }
331
332    /**
333     * Find the next script run. Returns <code>false</code> if there
334     * isn't another run, returns <code>true</code> if there is.
335     *
336     * @return <code>false</code> if there isn't another run, <code>true</code> if there is.
337     *
338     * @deprecated This API is ICU internal only.
339     * @hide draft / provisional / internal are hidden on Android
340     */
341    @Deprecated
342    public final boolean next()
343    {
344        // if we've fallen off the end of the text, we're done
345        if (scriptLimit >= textLimit) {
346            return false;
347        }
348
349        scriptCode  = UScript.COMMON;
350        scriptStart = scriptLimit;
351
352        syncFixup();
353
354        while (textIndex < textLimit) {
355            int ch = UTF16.charAt(text, textStart, textLimit, textIndex - textStart);
356            int codePointCount = UTF16.getCharCount(ch);
357            int sc = UScript.getScript(ch);
358            int pairIndex = getPairIndex(ch);
359
360            textIndex += codePointCount;
361
362            // Paired character handling:
363            //
364            // if it's an open character, push it onto the stack.
365            // if it's a close character, find the matching open on the
366            // stack, and use that script code. Any non-matching open
367            // characters above it on the stack will be poped.
368            if (pairIndex >= 0) {
369                if ((pairIndex & 1) == 0) {
370                    push(pairIndex, scriptCode);
371                } else {
372                    int pi = pairIndex & ~1;
373
374                    while (stackIsNotEmpty() && top().pairIndex != pi) {
375                        pop();
376                    }
377
378                    if (stackIsNotEmpty()) {
379                        sc = top().scriptCode;
380                    }
381                }
382            }
383
384            if (sameScript(scriptCode, sc)) {
385                if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED) {
386                    scriptCode = sc;
387
388                    fixup(scriptCode);
389                }
390
391                // if this character is a close paired character,
392                // pop the matching open character from the stack
393                if (pairIndex >= 0 && (pairIndex & 1) != 0) {
394                    pop();
395                }
396            } else {
397                // We've just seen the first character of
398                // the next run. Back over it so we'll see
399                // it again the next time.
400                textIndex -= codePointCount;
401                break;
402            }
403        }
404
405        scriptLimit = textIndex;
406        return true;
407    }
408
409    /**
410     * Compare two script codes to see if they are in the same script. If one script is
411     * a strong script, and the other is INHERITED or COMMON, it will compare equal.
412     *
413     * @param scriptOne one of the script codes.
414     * @param scriptTwo the other script code.
415     * @return <code>true</code> if the two scripts are the same.
416     * @see android.icu.lang.UScript
417     */
418    private static boolean sameScript(int scriptOne, int scriptTwo)
419    {
420        return scriptOne <= UScript.INHERITED || scriptTwo <= UScript.INHERITED || scriptOne == scriptTwo;
421    }
422
423    /*
424     * An internal class which holds entries on the paren stack.
425     */
426    private static final class ParenStackEntry
427    {
428        int pairIndex;
429        int scriptCode;
430
431        public ParenStackEntry(int thePairIndex, int theScriptCode)
432        {
433            pairIndex  = thePairIndex;
434            scriptCode = theScriptCode;
435        }
436    }
437
438    private static final int mod(int sp)
439    {
440        return sp % PAREN_STACK_DEPTH;
441    }
442
443    private static final int inc(int sp, int count)
444    {
445        return mod(sp + count);
446    }
447
448    private static final int inc(int sp)
449    {
450        return inc(sp, 1);
451    }
452
453    private static final int dec(int sp, int count)
454    {
455        return mod(sp + PAREN_STACK_DEPTH - count);
456    }
457
458    private static final int dec(int sp)
459    {
460        return dec(sp, 1);
461    }
462
463    private static final int limitInc(int count)
464    {
465        if (count < PAREN_STACK_DEPTH) {
466            count += 1;
467        }
468
469        return count;
470    }
471
472    private final boolean stackIsEmpty()
473    {
474        return pushCount <= 0;
475    }
476
477    private final boolean stackIsNotEmpty()
478    {
479        return ! stackIsEmpty();
480    }
481
482    private final void push(int pairIndex, int scrptCode)
483    {
484        pushCount  = limitInc(pushCount);
485        fixupCount = limitInc(fixupCount);
486
487        parenSP = inc(parenSP);
488        parenStack[parenSP] = new ParenStackEntry(pairIndex, scrptCode);
489    }
490
491    private final void pop()
492    {
493
494        if (stackIsEmpty()) {
495            return;
496        }
497
498        parenStack[parenSP] = null;
499
500        if (fixupCount > 0) {
501            fixupCount -= 1;
502        }
503
504        pushCount -= 1;
505        parenSP = dec(parenSP);
506
507        // If the stack is now empty, reset the stack
508        // pointers to their initial values.
509        if (stackIsEmpty()) {
510            parenSP = -1;
511        }
512    }
513
514    private final ParenStackEntry top()
515    {
516        return parenStack[parenSP];
517    }
518
519    private final void syncFixup()
520    {
521        fixupCount = 0;
522    }
523
524    private final void fixup(int scrptCode)
525    {
526        int fixupSP = dec(parenSP, fixupCount);
527
528        while (fixupCount-- > 0) {
529            fixupSP = inc(fixupSP);
530            parenStack[fixupSP].scriptCode = scrptCode;
531        }
532    }
533
534    private char[] emptyCharArray = {};
535
536    private char[] text;
537
538    private int textIndex;
539    private int  textStart;
540    private int  textLimit;
541
542    private int  scriptStart;
543    private int  scriptLimit;
544    private int  scriptCode;
545
546    private static int PAREN_STACK_DEPTH = 32;
547    private static ParenStackEntry parenStack[] = new ParenStackEntry[PAREN_STACK_DEPTH];
548    private int parenSP = -1;
549    private int pushCount = 0;
550    private int fixupCount = 0;
551
552    /**
553     * Find the highest bit that's set in a word. Uses a binary search through
554     * the bits.
555     *
556     * @param n the word in which to find the highest bit that's set.
557     * @return the bit number (counting from the low order bit) of the highest bit.
558     */
559    private static final byte highBit(int n)
560    {
561        if (n <= 0) {
562            return -32;
563        }
564
565        byte bit = 0;
566
567        if (n >= 1 << 16) {
568            n >>= 16;
569            bit += 16;
570        }
571
572        if (n >= 1 << 8) {
573            n >>= 8;
574            bit += 8;
575        }
576
577        if (n >= 1 << 4) {
578            n >>= 4;
579            bit += 4;
580        }
581
582        if (n >= 1 << 2) {
583            n >>= 2;
584            bit += 2;
585        }
586
587        if (n >= 1 << 1) {
588            n >>= 1;
589            bit += 1;
590        }
591
592        return bit;
593    }
594
595    /**
596     * Search the pairedChars array for the given character.
597     *
598     * @param ch the character for which to search.
599     * @return the index of the character in the table, or -1 if it's not there.
600     */
601    private static int getPairIndex(int ch)
602    {
603        int probe = pairedCharPower;
604        int index = 0;
605
606        if (ch >= pairedChars[pairedCharExtra]) {
607            index = pairedCharExtra;
608        }
609
610        while (probe > (1 << 0)) {
611            probe >>= 1;
612
613            if (ch >= pairedChars[index + probe]) {
614                index += probe;
615            }
616        }
617
618        if (pairedChars[index] != ch) {
619            index = -1;
620        }
621
622        return index;
623    }
624
625    private static int pairedChars[] = {
626        0x0028, 0x0029, // ascii paired punctuation
627        0x003c, 0x003e,
628        0x005b, 0x005d,
629        0x007b, 0x007d,
630        0x00ab, 0x00bb, // guillemets
631        0x2018, 0x2019, // general punctuation
632        0x201c, 0x201d,
633        0x2039, 0x203a,
634        0x3008, 0x3009, // chinese paired punctuation
635        0x300a, 0x300b,
636        0x300c, 0x300d,
637        0x300e, 0x300f,
638        0x3010, 0x3011,
639        0x3014, 0x3015,
640        0x3016, 0x3017,
641        0x3018, 0x3019,
642        0x301a, 0x301b
643    };
644
645    private static int pairedCharPower = 1 << highBit(pairedChars.length);
646    private static int pairedCharExtra = pairedChars.length - pairedCharPower;
647}
648
649