1aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin/* GENERATED SOURCE. DO NOT MODIFY. */
2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
4aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin/*
5aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Copyright (C) 1998-2007 International Business Machines Corporation and
6aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Unicode, Inc. All Rights Reserved.<br>
7aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * The Unicode Consortium makes no expressed or implied warranty of any
8aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * kind, and assumes no liability for errors or omissions.
9aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * No liability is assumed for incidental and consequential damages
10aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * in connection with or arising out of the use of the information here.
11aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin */
12aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinpackage android.icu.dev.test.normalizer;
13aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin
14aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinimport java.util.BitSet;
15aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin
16aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinimport android.icu.dev.test.UTF16Util;
17aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin
18aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin/**
19aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Accesses the Normalization Data used for Forms C and D.<br>
20aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * @author Mark Davis
21aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Updates for supplementary code points:
22aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Vladimir Weinstein & Markus Scherer
23aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin */
24aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinpublic class NormalizerData {
25aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin//    static final String copyright = "Copyright (C) 1998-2003 International Business Machines Corporation and Unicode, Inc.";
26aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin
27aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    /**
28aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * Constant for use in getPairwiseComposition
29aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    */
30aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    public static final int NOT_COMPOSITE = '\uFFFF';
31aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin
32aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    /**
33aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * Gets the combining class of a character from the
34aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * Unicode Character Database.
35aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * @param   ch      the source character
36aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * @return          value from 0 to 255
37aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    */
38aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    public int getCanonicalClass(int ch) {
39aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin        return canonicalClass.get(ch);
40aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    }
41aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin
42aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    /**
43aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * Returns the composite of the two characters. If the two
44aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * characters don't combine, returns NOT_COMPOSITE.
45aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * @param   first   first character (e.g. 'c')
46aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * @param   second  second character (e.g. \u0327 cedilla)
47aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * @return          composite (e.g. \u00C7 c cedilla)
48aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    */
49aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    public int getPairwiseComposition(int first, int second) {
50aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin        return compose.get(((long)first << 32) | second);
51aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    }
52aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin
53aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin
54aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    /**
55aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * Gets recursive decomposition of a character from the
56aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * Unicode Character Database.
57aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * @param   canonical    If true
58aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    *                  bit is on in this byte, then selects the recursive
59aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    *                  canonical decomposition, otherwise selects
60aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    *                  the recursive compatibility and canonical decomposition.
61aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * @param   ch      the source character
62aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * @param   buffer  buffer to be filled with the decomposition
63aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    */
64aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    public void getRecursiveDecomposition(boolean canonical, int ch, StringBuffer buffer) {
65aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin        String decomp = decompose.get(ch);
66aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin        if (decomp != null && !(canonical && isCompatibility.get(ch))) {
67aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin            for (int i = 0; i < decomp.length(); i+=UTF16Util.codePointLength(ch)) {
68aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin                ch = UTF16Util.nextCodePoint(decomp, i);
69aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin                getRecursiveDecomposition(canonical, ch, buffer);
70aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin            }
71aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin        } else {                    // if no decomp, append
72aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin            UTF16Util.appendCodePoint(buffer, ch);
73aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin        }
74aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    }
75aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin
76aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    // =================================================
77aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    //                   PRIVATES
78aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    // =================================================
79aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin
80aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    /**
81aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin     * Only accessed by NormalizerBuilder.
82aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin     */
83aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    NormalizerData(IntHashtable canonicalClass, IntStringHashtable decompose,
84aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin      LongHashtable compose, BitSet isCompatibility, BitSet isExcluded) {
85aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin        this.canonicalClass = canonicalClass;
86aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin        this.decompose = decompose;
87aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin        this.compose = compose;
88aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin        this.isCompatibility = isCompatibility;
89aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin        this.isExcluded = isExcluded;
90aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    }
91aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin
92aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    /**
93aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * Just accessible for testing.
94aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    */
95aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    boolean getExcluded (char ch) {
96aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin        return isExcluded.get(ch);
97aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    }
98aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin
99aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    /**
100aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * Just accessible for testing.
101aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    */
102aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    String getRawDecompositionMapping (char ch) {
103aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin        return decompose.get(ch);
104aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    }
105aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin
106aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    /**
107aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * For now, just use IntHashtable
108aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * Two-stage tables would be used in an optimized implementation.
109aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    */
110aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    private IntHashtable canonicalClass;
111aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin
112aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    /**
113aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * The main data table maps chars to a 32-bit int.
114aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * It holds either a pair: top = first, bottom = second
115aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * or singleton: top = 0, bottom = single.
116aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * If there is no decomposition, the value is 0.
117aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * Two-stage tables would be used in an optimized implementation.
118aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * An optimization could also map chars to a small index, then use that
119aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * index in a small array of ints.
120aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    */
121aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    private IntStringHashtable decompose;
122aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin
123aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    /**
124aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * Maps from pairs of characters to single.
125aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * If there is no decomposition, the value is NOT_COMPOSITE.
126aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    */
127aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    private LongHashtable compose;
128aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin
129aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    /**
130aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * Tells whether decomposition is canonical or not.
131aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    */
132aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    private BitSet isCompatibility = new BitSet();
133aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin
134aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    /**
135aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * Tells whether character is script-excluded or not.
136aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    * Used only while building, and for testing.
137aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    */
138aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin
139aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin    private BitSet isExcluded = new BitSet();
140aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin}
141