1aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin/* GENERATED SOURCE. DO NOT MODIFY. */ 2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others. 3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 4aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin/* 5aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Copyright (C) 1998-2007 International Business Machines Corporation and 6aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Unicode, Inc. All Rights Reserved.<br> 7aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * The Unicode Consortium makes no expressed or implied warranty of any 8aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * kind, and assumes no liability for errors or omissions. 9aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * No liability is assumed for incidental and consequential damages 10aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * in connection with or arising out of the use of the information here. 11aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin */ 12aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinpackage android.icu.dev.test.normalizer; 13aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 14aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinimport java.util.BitSet; 15aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 16aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinimport android.icu.dev.test.UTF16Util; 17aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 18aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin/** 19aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Accesses the Normalization Data used for Forms C and D.<br> 20aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * @author Mark Davis 21aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Updates for supplementary code points: 22aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Vladimir Weinstein & Markus Scherer 23aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin */ 24aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffinpublic class NormalizerData { 25aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin// static final String copyright = "Copyright (C) 1998-2003 International Business Machines Corporation and Unicode, Inc."; 26aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 27aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin /** 28aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Constant for use in getPairwiseComposition 29aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin */ 30aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin public static final int NOT_COMPOSITE = '\uFFFF'; 31aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 32aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin /** 33aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Gets the combining class of a character from the 34aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Unicode Character Database. 35aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * @param ch the source character 36aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * @return value from 0 to 255 37aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin */ 38aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin public int getCanonicalClass(int ch) { 39aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin return canonicalClass.get(ch); 40aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 41aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 42aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin /** 43aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Returns the composite of the two characters. If the two 44aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * characters don't combine, returns NOT_COMPOSITE. 45aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * @param first first character (e.g. 'c') 46aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * @param second second character (e.g. \u0327 cedilla) 47aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * @return composite (e.g. \u00C7 c cedilla) 48aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin */ 49aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin public int getPairwiseComposition(int first, int second) { 50aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin return compose.get(((long)first << 32) | second); 51aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 52aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 53aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 54aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin /** 55aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Gets recursive decomposition of a character from the 56aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Unicode Character Database. 57aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * @param canonical If true 58aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * bit is on in this byte, then selects the recursive 59aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * canonical decomposition, otherwise selects 60aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * the recursive compatibility and canonical decomposition. 61aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * @param ch the source character 62aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * @param buffer buffer to be filled with the decomposition 63aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin */ 64aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin public void getRecursiveDecomposition(boolean canonical, int ch, StringBuffer buffer) { 65aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String decomp = decompose.get(ch); 66aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin if (decomp != null && !(canonical && isCompatibility.get(ch))) { 67aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin for (int i = 0; i < decomp.length(); i+=UTF16Util.codePointLength(ch)) { 68aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin ch = UTF16Util.nextCodePoint(decomp, i); 69aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin getRecursiveDecomposition(canonical, ch, buffer); 70aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 71aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } else { // if no decomp, append 72aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin UTF16Util.appendCodePoint(buffer, ch); 73aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 74aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 75aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 76aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin // ================================================= 77aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin // PRIVATES 78aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin // ================================================= 79aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 80aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin /** 81aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Only accessed by NormalizerBuilder. 82aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin */ 83aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin NormalizerData(IntHashtable canonicalClass, IntStringHashtable decompose, 84aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin LongHashtable compose, BitSet isCompatibility, BitSet isExcluded) { 85aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin this.canonicalClass = canonicalClass; 86aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin this.decompose = decompose; 87aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin this.compose = compose; 88aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin this.isCompatibility = isCompatibility; 89aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin this.isExcluded = isExcluded; 90aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 91aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 92aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin /** 93aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Just accessible for testing. 94aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin */ 95aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin boolean getExcluded (char ch) { 96aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin return isExcluded.get(ch); 97aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 98aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 99aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin /** 100aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Just accessible for testing. 101aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin */ 102aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin String getRawDecompositionMapping (char ch) { 103aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin return decompose.get(ch); 104aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin } 105aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 106aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin /** 107aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * For now, just use IntHashtable 108aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Two-stage tables would be used in an optimized implementation. 109aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin */ 110aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin private IntHashtable canonicalClass; 111aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 112aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin /** 113aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * The main data table maps chars to a 32-bit int. 114aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * It holds either a pair: top = first, bottom = second 115aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * or singleton: top = 0, bottom = single. 116aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * If there is no decomposition, the value is 0. 117aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Two-stage tables would be used in an optimized implementation. 118aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * An optimization could also map chars to a small index, then use that 119aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * index in a small array of ints. 120aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin */ 121aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin private IntStringHashtable decompose; 122aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 123aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin /** 124aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Maps from pairs of characters to single. 125aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * If there is no decomposition, the value is NOT_COMPOSITE. 126aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin */ 127aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin private LongHashtable compose; 128aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 129aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin /** 130aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Tells whether decomposition is canonical or not. 131aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin */ 132aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin private BitSet isCompatibility = new BitSet(); 133aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 134aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin /** 135aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Tells whether character is script-excluded or not. 136aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin * Used only while building, and for testing. 137aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin */ 138aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin 139aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin private BitSet isExcluded = new BitSet(); 140aefe4d1f8f1773ead1a52f7a5d2c9e0009353600Paul Duffin} 141