Normalizer2Impl.java revision 1537b2f39245c07b00aa78c3600f7aebcb172490
12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */ 22ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* 32ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ******************************************************************************* 42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Copyright (C) 2009-2015, International Business Machines 52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Corporation and others. All Rights Reserved. 62ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ******************************************************************************* 72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.impl; 102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.IOException; 122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.nio.ByteBuffer; 132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.ArrayList; 142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.Iterator; 152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UTF16; 172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UnicodeSet; 182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.ICUUncheckedIOException; 192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.VersionInfo; 202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 211537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller/** 221537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller * @hide Only a subset of ICU is exposed in Android 231537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller * @hide All android.icu classes are currently hidden 24836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller */ 252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic final class Normalizer2Impl { 262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final class Hangul { 272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* Korean Hangul and Jamo constants */ 282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_L_BASE=0x1100; /* "lead" jamo */ 292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_L_END=0x1112; 302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_V_BASE=0x1161; /* "vowel" jamo */ 312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_V_END=0x1175; 322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_T_BASE=0x11a7; /* "trail" jamo */ 332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_T_END=0x11c2; 342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int HANGUL_BASE=0xac00; 362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int HANGUL_END=0xd7a3; 372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_L_COUNT=19; 392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_V_COUNT=21; 402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_T_COUNT=28; 412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_L_LIMIT=JAMO_L_BASE+JAMO_L_COUNT; 432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_V_LIMIT=JAMO_V_BASE+JAMO_V_COUNT; 442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_VT_COUNT=JAMO_V_COUNT*JAMO_T_COUNT; 462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT; 482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT; 492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static boolean isHangul(int c) { 512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return HANGUL_BASE<=c && c<HANGUL_LIMIT; 522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static boolean isHangulWithoutJamoT(char c) { 542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c-=HANGUL_BASE; 552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return c<HANGUL_COUNT && c%JAMO_T_COUNT==0; 562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static boolean isJamoL(int c) { 582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return JAMO_L_BASE<=c && c<JAMO_L_LIMIT; 592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static boolean isJamoV(int c) { 612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return JAMO_V_BASE<=c && c<JAMO_V_LIMIT; 622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Decomposes c, which must be a Hangul syllable, into buffer 662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and returns the length of the decomposition (2 or 3). 672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static int decompose(int c, Appendable buffer) { 692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c-=HANGUL_BASE; 712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c2=c%JAMO_T_COUNT; 722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c/=JAMO_T_COUNT; 732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append((char)(JAMO_L_BASE+c/JAMO_V_COUNT)); 742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append((char)(JAMO_V_BASE+c%JAMO_V_COUNT)); 752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c2==0) { 762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 2; 772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append((char)(JAMO_T_BASE+c2)); 792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 3; 802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch(IOException e) { 822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Will not occur because we do not write to I/O. 832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ICUUncheckedIOException(e); 842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Decomposes c, which must be a Hangul syllable, into buffer. 892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This is the raw, not recursive, decomposition. Its length is always 2. 902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static void getRawDecomposition(int c, Appendable buffer) { 922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int orig=c; 942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c-=HANGUL_BASE; 952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c2=c%JAMO_T_COUNT; 962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c2==0) { 972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c/=JAMO_T_COUNT; 982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append((char)(JAMO_L_BASE+c/JAMO_V_COUNT)); 992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append((char)(JAMO_V_BASE+c%JAMO_V_COUNT)); 1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append((char)(orig-c2)); // LV syllable 1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append((char)(JAMO_T_BASE+c2)); 1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch(IOException e) { 1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Will not occur because we do not write to I/O. 1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ICUUncheckedIOException(e); 1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Writable buffer that takes care of canonical ordering. 1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Its Appendable methods behave like the C++ implementation's 1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * appendZeroCC() methods. 1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p> 1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If dest is a StringBuilder, then the buffer writes directly to it. 1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Otherwise, the buffer maintains a StringBuilder for intermediate text segments 1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * until no further changes are necessary and whole segments are appended. 1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * append() methods that take combining-class values always write to the StringBuilder. 1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Other append() methods flush and append to the Appendable. 1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final class ReorderingBuffer implements Appendable { 1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public ReorderingBuffer(Normalizer2Impl ni, Appendable dest, int destCapacity) { 1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller impl=ni; 1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller app=dest; 1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(app instanceof StringBuilder) { 1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appIsStringBuilder=true; 1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str=(StringBuilder)dest; 1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // In Java, the constructor subsumes public void init(int destCapacity) { 1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.ensureCapacity(destCapacity); 1312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=0; 1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(str.length()==0) { 1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=0; 1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setIterator(); 1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=previousCC(); 1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Set reorderStart after the last code point with cc<=1 if there is one. 1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(lastCC>1) { 1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(previousCC()>1) {} 1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=codePointLimit; 1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appIsStringBuilder=false; 1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str=new StringBuilder(); 1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=0; 1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=0; 1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean isEmpty() { return str.length()==0; } 1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int length() { return str.length(); } 1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int getLastCC() { return lastCC; } 1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public StringBuilder getStringBuilder() { return str; } 1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean equals(CharSequence s, int start, int limit) { 1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return UTF16Plus.equal(str, 0, str.length(), s, start, limit); 1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // For Hangul composition, replacing the Leading consonant Jamo with the syllable. 1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void setLastChar(char c) { 1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.setCharAt(str.length()-1, c); 1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void append(int c, int cc) { 1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(lastCC<=cc || cc==0) { 1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.appendCodePoint(c); 1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=cc; 1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(cc<=1) { 1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=str.length(); 1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller insert(c, cc); 1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // s must be in NFD, otherwise change the implementation. 1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void append(CharSequence s, int start, int limit, 1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int leadCC, int trailCC) { 1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(start==limit) { 1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(lastCC<=leadCC || leadCC==0) { 1842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(trailCC<=1) { 1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=str.length()+(limit-start); 1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(leadCC<=1) { 1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=str.length()+1; // Ok if not a code point boundary. 1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.append(s, start, limit); 1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=trailCC; 1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c=Character.codePointAt(s, start); 1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller start+=Character.charCount(c); 1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller insert(c, leadCC); // insert first code point 1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(start<limit) { 1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=Character.codePointAt(s, start); 1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller start+=Character.charCount(c); 1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(start<limit) { 1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // s must be in NFD, otherwise we need to use getCC(). 2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller leadCC=getCCFromYesOrMaybe(impl.getNorm16(c)); 2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller leadCC=trailCC; 2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller append(c, leadCC); 2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The following append() methods work like C++ appendZeroCC(). 2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // They assume that the cc or trailCC of their input is 0. 2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Most of them implement Appendable interface methods. 2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // @Override when we switch to Java 6 2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public ReorderingBuffer append(char c) { 2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.append(c); 2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=0; 2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=str.length(); 2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void appendZeroCC(int c) { 2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.appendCodePoint(c); 2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=0; 2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=str.length(); 2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // @Override when we switch to Java 6 2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public ReorderingBuffer append(CharSequence s) { 2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(s.length()!=0) { 2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.append(s); 2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=0; 2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=str.length(); 2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // @Override when we switch to Java 6 2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public ReorderingBuffer append(CharSequence s, int start, int limit) { 2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(start!=limit) { 2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.append(s, start, limit); 2362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=0; 2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=str.length(); 2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Flushes from the intermediate StringBuilder to the Appendable, 2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * if they are different objects. 2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Used after recomposition. 2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Must be called at the end when writing to a non-StringBuilder Appendable. 2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void flush() { 2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(appIsStringBuilder) { 2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=str.length(); 2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller app.append(str); 2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.setLength(0); 2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=0; 2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch(IOException e) { 2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ICUUncheckedIOException(e); // Avoid declaring "throws IOException". 2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=0; 2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Flushes from the intermediate StringBuilder to the Appendable, 2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * if they are different objects. 2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Then appends the new text to the Appendable or StringBuilder. 2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Normally used after quick check loops find a non-empty sequence. 2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public ReorderingBuffer flushAndAppendZeroCC(CharSequence s, int start, int limit) { 2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(appIsStringBuilder) { 2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.append(s, start, limit); 2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=str.length(); 2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller app.append(str).append(s, start, limit); 2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.setLength(0); 2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=0; 2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch(IOException e) { 2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ICUUncheckedIOException(e); // Avoid declaring "throws IOException". 2782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=0; 2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void remove() { 2842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.setLength(0); 2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=0; 2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=0; 2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void removeSuffix(int suffixLength) { 2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int oldLength=str.length(); 2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.delete(oldLength-suffixLength, oldLength); 2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=0; 2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=str.length(); 2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * TODO: Revisit whether it makes sense to track reorderStart. 2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * It is set to after the last known character with cc<=1, 2982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * which stops previousCC() before it reads that character and looks up its cc. 2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * previousCC() is normally only called from insert(). 3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * In other words, reorderStart speeds up the insertion of a combining mark 3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * into a multi-combining mark sequence where it does not belong at the end. 3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This might not be worth the trouble. 3032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * On the other hand, it's not a huge amount of trouble. 3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * We probably need it for UNORM_SIMPLE_APPEND. 3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Inserts c somewhere before the last character. 3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Requires 0<cc<lastCC which implies reorderStart<limit. 3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void insert(int c, int cc) { 3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(setIterator(), skipPrevious(); previousCC()>cc;) {} 3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // insert c at codePointLimit, after the character with prevCC<=cc 3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c<=0xffff) { 3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.insert(codePointLimit, (char)c); 3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(cc<=1) { 3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=codePointLimit+1; 3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.insert(codePointLimit, Character.toChars(c)); 3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(cc<=1) { 3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=codePointLimit+2; 3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final Normalizer2Impl impl; 3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final Appendable app; 3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final StringBuilder str; 3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final boolean appIsStringBuilder; 3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int reorderStart; 3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int lastCC; 3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // private backward iterator 3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void setIterator() { codePointStart=str.length(); } 3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void skipPrevious() { // Requires 0<codePointStart. 3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller codePointLimit=codePointStart; 3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller codePointStart=str.offsetByCodePoints(codePointStart, -1); 3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int previousCC() { // Returns 0 if there is no previous character. 3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller codePointLimit=codePointStart; 3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(reorderStart>=codePointStart) { 3422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c=str.codePointBefore(codePointStart); 3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller codePointStart-=Character.charCount(c); 3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c<MIN_CCC_LCCC_CP) { 3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return getCCFromYesOrMaybe(impl.getNorm16(c)); 3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int codePointStart, codePointLimit; 3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // TODO: Propose as public API on the UTF16 class. 3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // TODO: Propose widening UTF16 methods that take char to take int. 3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // TODO: Propose widening UTF16 methods that take String to take CharSequence. 3582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final class UTF16Plus { 3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Assuming c is a surrogate code point (UTF16.isSurrogate(c)), 3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * is it a lead surrogate? 3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c code unit or code point 3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true or false 3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static boolean isSurrogateLead(int c) { return (c&0x400)==0; } 3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Compares two CharSequence objects for binary equality. 3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s1 first sequence 3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s2 second sequence 3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if s1 contains the same text as s2 3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static boolean equal(CharSequence s1, CharSequence s2) { 3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(s1==s2) { 3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int length=s1.length(); 3772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(length!=s2.length()) { 3782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i=0; i<length; ++i) { 3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(s1.charAt(i)!=s2.charAt(i)) { 3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Compares two CharSequence subsequences for binary equality. 3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s1 first sequence 3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param start1 start offset in first sequence 3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param limit1 limit offset in first sequence 3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s2 second sequence 3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param start2 start offset in second sequence 3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param limit2 limit offset in second sequence 3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if s1.subSequence(start1, limit1) contains the same text 3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * as s2.subSequence(start2, limit2) 3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static boolean equal(CharSequence s1, int start1, int limit1, 3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CharSequence s2, int start2, int limit2) { 4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((limit1-start1)!=(limit2-start2)) { 4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(s1==s2 && start1==start2) { 4042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 4052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(start1<limit1) { 4072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(s1.charAt(start1++)!=s2.charAt(start2++)) { 4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public Normalizer2Impl() {} 4162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final class IsAcceptable implements ICUBinary.Authenticate { 4182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // @Override when we switch to Java 6 4192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean isDataVersionAcceptable(byte version[]) { 4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return version[0]==2; 4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable(); 4242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int DATA_FORMAT = 0x4e726d32; // "Nrm2" 4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public Normalizer2Impl load(ByteBuffer bytes) { 4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dataVersion=ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE); 4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int indexesLength=bytes.getInt()/4; // inIndexes[IX_NORM_TRIE_OFFSET]/4 4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(indexesLength<=IX_MIN_MAYBE_YES) { 4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ICUUncheckedIOException("Normalizer2 data: not enough indexes"); 4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] inIndexes=new int[indexesLength]; 4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller inIndexes[0]=indexesLength*4; 4352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i=1; i<indexesLength; ++i) { 4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller inIndexes[i]=bytes.getInt(); 4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP]; 4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP]; 4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller minYesNo=inIndexes[IX_MIN_YES_NO]; 4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller minYesNoMappingsOnly=inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY]; 4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller minNoNo=inIndexes[IX_MIN_NO_NO]; 4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller limitNoNo=inIndexes[IX_LIMIT_NO_NO]; 4462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller minMaybeYes=inIndexes[IX_MIN_MAYBE_YES]; 4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Read the normTrie. 4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int offset=inIndexes[IX_NORM_TRIE_OFFSET]; 4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET]; 4512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller normTrie=Trie2_16.createFromSerialized(bytes); 4522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int trieLength=normTrie.getSerializedLength(); 4532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(trieLength>(nextOffset-offset)) { 4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ICUUncheckedIOException("Normalizer2 data: not enough bytes for normTrie"); 4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ICUBinary.skipBytes(bytes, (nextOffset-offset)-trieLength); // skip padding after trie bytes 4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Read the composition and mapping data. 4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller offset=nextOffset; 4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nextOffset=inIndexes[IX_SMALL_FCD_OFFSET]; 4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int numChars=(nextOffset-offset)/2; 4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(numChars!=0) { 4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller maybeYesCompositions=ICUBinary.getString(bytes, numChars, 0); 4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller extraData=maybeYesCompositions.substring(MIN_NORMAL_MAYBE_YES-minMaybeYes); 4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // smallFCD: new in formatVersion 2 4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller offset=nextOffset; 4692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller smallFCD=new byte[0x100]; 4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller bytes.get(smallFCD); 4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Build tccc180[]. 4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // gennorm2 enforces lccc=0 for c<MIN_CCC_LCCC_CP=U+0300. 4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller tccc180=new int[0x180]; 4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int bits=0; 4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int c=0; c<0x180; bits>>=1) { 4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((c&0xff)==0) { 4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller bits=smallFCD[c>>8]; // one byte per 0x100 code points 4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((bits&1)!=0) { 4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i=0; i<0x20; ++i, ++c) { 4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller tccc180[c]=getFCD16FromNormData(c)&0xff; 4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c+=0x20; 4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch(IOException e) { 4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ICUUncheckedIOException(e); 4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public Normalizer2Impl load(String name) { 4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return load(ICUBinary.getRequiredData(name)); 4962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void enumLcccRange(int start, int end, int norm16, UnicodeSet set) { 4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isAlgorithmicNoNo(norm16)) { 5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Range of code points with same-norm16-value algorithmic decompositions. 5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // They might have different non-zero FCD16 values. 5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller do { 5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int fcd16=getFCD16(start); 5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(fcd16>0xff) { set.add(start); } 5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } while(++start<=end); 5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int fcd16=getFCD16(start); 5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(fcd16>0xff) { set.add(start, end); } 5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void enumNorm16PropertyStartsRange(int start, int end, int value, UnicodeSet set) { 5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* add the start code point to the USet */ 5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(start); 5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(start!=end && isAlgorithmicNoNo(value)) { 5162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Range of code points with same-norm16-value algorithmic decompositions. 5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // They might have different non-zero FCD16 values. 5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prevFCD16=getFCD16(start); 5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(++start<=end) { 5202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int fcd16=getFCD16(start); 5212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(fcd16!=prevFCD16) { 5222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(start); 5232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevFCD16=fcd16; 5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void addLcccChars(UnicodeSet set) { 5302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* add the start code point of each same-value range of each trie */ 5312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Iterator<Trie2.Range> trieIterator=normTrie.iterator(); 5322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Trie2.Range range; 5332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 5342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller enumLcccRange(range.startCodePoint, range.endCodePoint, range.value, set); 5352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void addPropertyStarts(UnicodeSet set) { 5392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* add the start code point of each same-value range of each trie */ 5402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Iterator<Trie2.Range> trieIterator=normTrie.iterator(); 5412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Trie2.Range range; 5422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 5432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller enumNorm16PropertyStartsRange(range.startCodePoint, range.endCodePoint, range.value, set); 5442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* add Hangul LV syllables and LV+1 because of skippables */ 5472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int c=Hangul.HANGUL_BASE; c<Hangul.HANGUL_LIMIT; c+=Hangul.JAMO_T_COUNT) { 5482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(c); 5492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(c+1); 5502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(Hangul.HANGUL_LIMIT); /* add Hangul+1 to continue with other properties */ 5522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void addCanonIterPropertyStarts(UnicodeSet set) { 5552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* add the start code point of each same-value range of the canonical iterator data trie */ 5562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ensureCanonIterData(); 5572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // currently only used for the SEGMENT_STARTER property 5582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Iterator<Trie2.Range> trieIterator=canonIterData.iterator(segmentStarterMapper); 5592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Trie2.Range range; 5602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 5612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* add the start code point to the USet */ 5622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(range.startCodePoint); 5632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final Trie2.ValueMapper segmentStarterMapper=new Trie2.ValueMapper() { 5662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int map(int in) { 5672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return in&CANON_NOT_SEGMENT_STARTER; 5682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller }; 5702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // low-level properties ------------------------------------------------ *** 5722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public Trie2_16 getNormTrie() { return normTrie; } 5742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Note: Normalizer2Impl.java r30983 (2011-nov-27) 5762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // still had getFCDTrie() which built and cached an FCD trie. 5772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // That provided faster access to FCD data than getFCD16FromNormData() 5782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // but required synchronization and consumed some 10kB of heap memory 5792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // in any process that uses FCD (e.g., via collation). 5802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // tccc180[] and smallFCD[] are intended to help with any loss of performance, 5812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // at least for Latin & CJK. 5822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Builds the canonical-iterator data for this instance. 5852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This is required before any of {@link #isCanonSegmentStarter(int)} or 5862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * {@link #getCanonStartSet(int, UnicodeSet)} are called, 5872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * or else they crash. 5882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return this 5892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public synchronized Normalizer2Impl ensureCanonIterData() { 5912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(canonIterData==null) { 5922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Trie2Writable newData=new Trie2Writable(0, 0); 5932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller canonStartSets=new ArrayList<UnicodeSet>(); 5942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Iterator<Trie2.Range> trieIterator=normTrie.iterator(); 5952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Trie2.Range range; 5962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 5972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller final int norm16=range.value; 5982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(norm16==0 || (minYesNo<=norm16 && norm16<minNoNo)) { 5992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Inert, or 2-way mapping (including Hangul syllable). 6002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We do not write a canonStartSet for any yesNo character. 6012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Composites from 2-way mappings are added at runtime from the 6022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // starter's compositions list, and the other characters in 6032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 2-way mappings get CANON_NOT_SEGMENT_STARTER set because they are 6042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // "maybe" characters. 6052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 6062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int c=range.startCodePoint; c<=range.endCodePoint; ++c) { 6082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller final int oldValue=newData.get(c); 6092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int newValue=oldValue; 6102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(norm16>=minMaybeYes) { 6112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // not a segment starter if it occurs in a decomposition or has cc!=0 6122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newValue|=CANON_NOT_SEGMENT_STARTER; 6132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(norm16<MIN_NORMAL_MAYBE_YES) { 6142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newValue|=CANON_HAS_COMPOSITIONS; 6152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(norm16<minYesNo) { 6172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newValue|=CANON_HAS_COMPOSITIONS; 6182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 6192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c has a one-way decomposition 6202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c2=c; 6212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int norm16_2=norm16; 6222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(limitNoNo<=norm16_2 && norm16_2<minMaybeYes) { 6232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c2=this.mapAlgorithmic(c2, norm16_2); 6242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller norm16_2=getNorm16(c2); 6252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(minYesNo<=norm16_2 && norm16_2<limitNoNo) { 6272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c decomposes, get everything from the variable-length extra data 6282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int firstUnit=extraData.charAt(norm16_2); 6292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int length=firstUnit&MAPPING_LENGTH_MASK; 6302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) { 6312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c==c2 && (extraData.charAt(norm16_2-1)&0xff)!=0) { 6322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newValue|=CANON_NOT_SEGMENT_STARTER; // original c has cc!=0 6332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Skip empty mappings (no characters in the decomposition). 6362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(length!=0) { 6372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++norm16_2; // skip over the firstUnit 6382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // add c to first code point's start set 6392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int limit=norm16_2+length; 6402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c2=extraData.codePointAt(norm16_2); 6412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller addToStartSet(newData, c, c2); 6422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Set CANON_NOT_SEGMENT_STARTER for each remaining code point of a 6432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // one-way mapping. A 2-way mapping is possible here after 6442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // intermediate algorithmic mapping. 6452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(norm16_2>=minNoNo) { 6462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while((norm16_2+=Character.charCount(c2))<limit) { 6472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c2=extraData.codePointAt(norm16_2); 6482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c2Value=newData.get(c2); 6492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((c2Value&CANON_NOT_SEGMENT_STARTER)==0) { 6502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newData.set(c2, c2Value|CANON_NOT_SEGMENT_STARTER); 6512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 6562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c decomposed to c2 algorithmically; c has cc==0 6572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller addToStartSet(newData, c, c2); 6582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(newValue!=oldValue) { 6612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newData.set(c, newValue); 6622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller canonIterData=newData.toTrie2_32(); 6662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 6682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int getNorm16(int c) { return normTrie.get(c); } 6712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int getCompQuickCheck(int norm16) { 6732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(norm16<minNoNo || MIN_YES_YES_WITH_CC<=norm16) { 6742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 1; // yes 6752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(minMaybeYes<=norm16) { 6762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 2; // maybe 6772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 6782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; // no 6792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean isAlgorithmicNoNo(int norm16) { return limitNoNo<=norm16 && norm16<minMaybeYes; } 6822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean isCompNo(int norm16) { return minNoNo<=norm16 && norm16<minMaybeYes; } 6832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean isDecompYes(int norm16) { return norm16<minYesNo || minMaybeYes<=norm16; } 6842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int getCC(int norm16) { 6862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(norm16>=MIN_NORMAL_MAYBE_YES) { 6872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return norm16&0xff; 6882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(norm16<minNoNo || limitNoNo<=norm16) { 6902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 6912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return getCCFromNoNo(norm16); 6932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static int getCCFromYesOrMaybe(int norm16) { 6952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return norm16>=MIN_NORMAL_MAYBE_YES ? norm16&0xff : 0; 6962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 6992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the FCD data for code point c. 7002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c A Unicode code point. 7012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0. 7022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 7032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int getFCD16(int c) { 7042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c<0) { 7052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 7062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(c<0x180) { 7072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return tccc180[c]; 7082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(c<=0xffff) { 7092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!singleLeadMightHaveNonZeroFCD16(c)) { return 0; } 7102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return getFCD16FromNormData(c); 7122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** Returns the FCD data for U+0000<=c<U+0180. */ 7142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int getFCD16FromBelow180(int c) { return tccc180[c]; } 7152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** Returns true if the single-or-lead code unit c might have non-zero FCD data. */ 7162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean singleLeadMightHaveNonZeroFCD16(int lead) { 7172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 0<=lead<=0xffff 7182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byte bits=smallFCD[lead>>8]; 7192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(bits==0) { return false; } 7202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ((bits>>((lead>>5)&7))&1)!=0; 7212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** Gets the FCD value from the regular normalization data. */ 7242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int getFCD16FromNormData(int c) { 7252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Only loops for 1:1 algorithmic mappings. 7262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 7272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int norm16=getNorm16(c); 7282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(norm16<=minYesNo) { 7292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // no decomposition or Hangul syllable, all zeros 7302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 7312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(norm16>=MIN_NORMAL_MAYBE_YES) { 7322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // combining mark 7332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller norm16&=0xff; 7342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return norm16|(norm16<<8); 7352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(norm16>=minMaybeYes) { 7362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 7372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(isDecompNoAlgorithmic(norm16)) { 7382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=mapAlgorithmic(c, norm16); 7392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 7402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c decomposes, get everything from the variable-length extra data 7412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int firstUnit=extraData.charAt(norm16); 7422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((firstUnit&MAPPING_LENGTH_MASK)==0) { 7432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // A character that is deleted (maps to an empty string) must 7442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // get the worst-case lccc and tccc values because arbitrary 7452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // characters on both sides will become adjacent. 7462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0x1ff; 7472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 7482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int fcd16=firstUnit>>8; // tccc 7492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) { 7502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fcd16|=extraData.charAt(norm16-1)&0xff00; // lccc 7512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return fcd16; 7532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 7592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Gets the decomposition for one code point. 7602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c code point 7612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return c's decomposition, if it has one; returns null if it does not have a decomposition 7622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 7632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public String getDecomposition(int c) { 7642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int decomp=-1; 7652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int norm16; 7662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 7672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) { 7682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c does not decompose 7692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(isHangul(norm16)) { 7702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Hangul syllable: decompose algorithmically 7712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder buffer=new StringBuilder(); 7722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Hangul.decompose(c, buffer); 7732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return buffer.toString(); 7742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(isDecompNoAlgorithmic(norm16)) { 7752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller decomp=c=mapAlgorithmic(c, norm16); 7762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 7772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 7782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c decomposes, get everything from the variable-length extra data 7792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int length=extraData.charAt(norm16++)&MAPPING_LENGTH_MASK; 7802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return extraData.substring(norm16, norm16+length); 7812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(decomp<0) { 7832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return null; 7842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 7852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return UTF16.valueOf(decomp); 7862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 7912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Gets the raw decomposition for one code point. 7922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c code point 7932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return c's raw decomposition, if it has one; returns null if it does not have a decomposition 7942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 7952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public String getRawDecomposition(int c) { 7962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We do not loop in this method because an algorithmic mapping itself 7972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // becomes a final result rather than having to be decomposed recursively. 7982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int norm16; 7992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) { 8002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c does not decompose 8012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return null; 8022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(isHangul(norm16)) { 8032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Hangul syllable: decompose algorithmically 8042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder buffer=new StringBuilder(); 8052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Hangul.getRawDecomposition(c, buffer); 8062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return buffer.toString(); 8072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(isDecompNoAlgorithmic(norm16)) { 8082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return UTF16.valueOf(mapAlgorithmic(c, norm16)); 8092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 8102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c decomposes, get everything from the variable-length extra data 8112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int firstUnit=extraData.charAt(norm16); 8122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int mLength=firstUnit&MAPPING_LENGTH_MASK; // length of normal mapping 8132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((firstUnit&MAPPING_HAS_RAW_MAPPING)!=0) { 8142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Read the raw mapping from before the firstUnit and before the optional ccc/lccc word. 8152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Bit 7=MAPPING_HAS_CCC_LCCC_WORD 8162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int rawMapping=norm16-((firstUnit>>7)&1)-1; 8172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char rm0=extraData.charAt(rawMapping); 8182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(rm0<=MAPPING_LENGTH_MASK) { 8192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return extraData.substring(rawMapping-rm0, rawMapping); 8202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 8212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Copy the normal mapping and replace its first two code units with rm0. 8222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder buffer=new StringBuilder(mLength-1).append(rm0); 8232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller norm16+=1+2; // skip over the firstUnit and the first two mapping code units 8242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return buffer.append(extraData, norm16, norm16+mLength-2).toString(); 8252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 8272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller norm16+=1; // skip over the firstUnit 8282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return extraData.substring(norm16, norm16+mLength); 8292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 8342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns true if code point c starts a canonical-iterator string segment. 8352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <b>{@link #ensureCanonIterData()} must have been called before this method, 8362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * or else this method will crash.</b> 8372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c A Unicode code point. 8382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if c starts a canonical-iterator string segment. 8392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 8402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean isCanonSegmentStarter(int c) { 8412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return canonIterData.get(c)>=0; 8422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 8442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns true if there are characters whose decomposition starts with c. 8452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If so, then the set is cleared and then filled with those characters. 8462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <b>{@link #ensureCanonIterData()} must have been called before this method, 8472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * or else this method will crash.</b> 8482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c A Unicode code point. 8492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param set A UnicodeSet to receive the characters whose decompositions 8502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * start with c, if there are any. 8512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if there are characters whose decomposition starts with c. 8522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 8532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean getCanonStartSet(int c, UnicodeSet set) { 8542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int canonValue=canonIterData.get(c)&~CANON_NOT_SEGMENT_STARTER; 8552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(canonValue==0) { 8562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 8572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.clear(); 8592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int value=canonValue&CANON_VALUE_MASK; 8602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((canonValue&CANON_HAS_SET)!=0) { 8612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.addAll(canonStartSets.get(value)); 8622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(value!=0) { 8632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(value); 8642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((canonValue&CANON_HAS_COMPOSITIONS)!=0) { 8662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int norm16=getNorm16(c); 8672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(norm16==JAMO_L) { 8682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int syllable=Hangul.HANGUL_BASE+(c-Hangul.JAMO_L_BASE)*Hangul.JAMO_VT_COUNT; 8692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(syllable, syllable+Hangul.JAMO_VT_COUNT-1); 8702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 8712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller addComposites(getCompositionsList(norm16), set); 8722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 8752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int MIN_CCC_LCCC_CP=0x300; 8782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int MIN_YES_YES_WITH_CC=0xff01; 8802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_VT=0xff00; 8812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int MIN_NORMAL_MAYBE_YES=0xfe00; 8822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_L=1; 8832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int MAX_DELTA=0x40; 8842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Byte offsets from the start of the data, after the generic header. 8862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_NORM_TRIE_OFFSET=0; 8872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_EXTRA_DATA_OFFSET=1; 8882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_SMALL_FCD_OFFSET=2; 8892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_RESERVED3_OFFSET=3; 8902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_TOTAL_SIZE=7; 8912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Code point thresholds for quick check codes. 8932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_MIN_DECOMP_NO_CP=8; 8942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_MIN_COMP_NO_MAYBE_CP=9; 8952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Norm16 value thresholds for quick check combinations and types of extra data. 8972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Mappings & compositions in [minYesNo..minYesNoMappingsOnly[. 8982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_MIN_YES_NO=10; 8992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_MIN_NO_NO=11; 9002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_LIMIT_NO_NO=12; 9012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_MIN_MAYBE_YES=13; 9022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Mappings only in [minYesNoMappingsOnly..minNoNo[. 9042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_MIN_YES_NO_MAPPINGS_ONLY=14; 9052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_COUNT=16; 9072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int MAPPING_HAS_CCC_LCCC_WORD=0x80; 9092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int MAPPING_HAS_RAW_MAPPING=0x40; 9102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int MAPPING_NO_COMP_BOUNDARY_AFTER=0x20; 9112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int MAPPING_LENGTH_MASK=0x1f; 9122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int COMP_1_LAST_TUPLE=0x8000; 9142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int COMP_1_TRIPLE=1; 9152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int COMP_1_TRAIL_LIMIT=0x3400; 9162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int COMP_1_TRAIL_MASK=0x7ffe; 9172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int COMP_1_TRAIL_SHIFT=9; // 10-1 for the "triple" bit 9182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int COMP_2_TRAIL_SHIFT=6; 9192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int COMP_2_TRAIL_MASK=0xffc0; 9202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // higher-level functionality ------------------------------------------ *** 9222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // NFD without an NFD Normalizer2 instance. 9242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public Appendable decompose(CharSequence s, StringBuilder dest) { 9252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller decompose(s, 0, s.length(), dest, s.length()); 9262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return dest; 9272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 9292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Decomposes s[src, limit[ and writes the result to dest. 9302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * limit can be NULL if src is NUL-terminated. 9312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * destLengthEstimate is the initial dest buffer capacity and can be -1. 9322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 9332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void decompose(CharSequence s, int src, int limit, StringBuilder dest, 9342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int destLengthEstimate) { 9352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(destLengthEstimate<0) { 9362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller destLengthEstimate=limit-src; 9372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dest.setLength(0); 9392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ReorderingBuffer buffer=new ReorderingBuffer(this, dest, destLengthEstimate); 9402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller decompose(s, src, limit, buffer); 9412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Dual functionality: 9442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // buffer!=NULL: normalize 9452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes 9462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int decompose(CharSequence s, int src, int limit, 9472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ReorderingBuffer buffer) { 9482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int minNoCP=minDecompNoCP; 9492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prevSrc; 9512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c=0; 9522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int norm16=0; 9532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // only for quick check 9552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prevBoundary=src; 9562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prevCC=0; 9572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 9592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // count code units below the minimum or with irrelevant data for the quick check 9602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(prevSrc=src; src!=limit;) { 9612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( (c=s.charAt(src))<minNoCP || 9622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller isMostDecompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c)) 9632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ) { 9642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++src; 9652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(!UTF16.isSurrogate((char)c)) { 9662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 9672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 9682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c2; 9692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(UTF16Plus.isSurrogateLead(c)) { 9702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) { 9712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=Character.toCodePoint((char)c, c2); 9722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else /* trail surrogate */ { 9742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) { 9752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --src; 9762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=Character.toCodePoint(c2, (char)c); 9772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isMostDecompYesAndZeroCC(norm16=getNorm16(c))) { 9802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller src+=Character.charCount(c); 9812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 9822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 9832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // copy these code units all at once 9872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(src!=prevSrc) { 9882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(buffer!=null) { 9892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.flushAndAppendZeroCC(s, prevSrc, src); 9902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 9912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevCC=0; 9922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevBoundary=src; 9932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(src==limit) { 9962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 9972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Check one above-minimum, relevant code point. 10002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller src+=Character.charCount(c); 10012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(buffer!=null) { 10022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller decompose(c, norm16, buffer); 10032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 10042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isDecompYes(norm16)) { 10052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int cc=getCCFromYesOrMaybe(norm16); 10062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(prevCC<=cc || cc==0) { 10072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevCC=cc; 10082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(cc<=1) { 10092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevBoundary=src; 10102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 10122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return prevBoundary; // "no" or cc out of order 10152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return src; 10182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void decomposeAndAppend(CharSequence s, boolean doDecompose, ReorderingBuffer buffer) { 10202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int limit=s.length(); 10212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(limit==0) { 10222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 10232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(doDecompose) { 10252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller decompose(s, 0, limit, buffer); 10262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 10272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Just merge the strings at the boundary. 10292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c=Character.codePointAt(s, 0); 10302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int src=0; 10312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int firstCC, prevCC, cc; 10322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller firstCC=prevCC=cc=getCC(getNorm16(c)); 10332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(cc!=0) { 10342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevCC=cc; 10352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller src+=Character.charCount(c); 10362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(src>=limit) { 10372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 10382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=Character.codePointAt(s, src); 10402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cc=getCC(getNorm16(c)); 10412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller }; 10422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(s, 0, src, firstCC, prevCC); 10432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(s, src, limit); 10442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Very similar to composeQuickCheck(): Make the same changes in both places if relevant. 10462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // doCompose: normalize 10472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // !doCompose: isNormalized (buffer must be empty and initialized) 10482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean compose(CharSequence s, int src, int limit, 10492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean onlyContiguous, 10502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean doCompose, 10512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ReorderingBuffer buffer) { 10522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int minNoMaybeCP=minCompNoMaybeCP; 10532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 10552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * prevBoundary points to the last character before the current one 10562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * that has a composition boundary before it with ccc==0 and quick check "yes". 10572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Keeping track of prevBoundary saves us looking for a composition boundary 10582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * when we find a "no" or "maybe". 10592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 10602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * When we back out from prevSrc back to prevBoundary, 10612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * then we also remove those same characters (which had been simply copied 10622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * or canonically-order-inserted) from the ReorderingBuffer. 10632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Therefore, at all times, the [prevBoundary..prevSrc[ source units 10642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * must correspond 1:1 to destination units at the end of the destination buffer. 10652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 10662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prevBoundary=src; 10672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prevSrc; 10682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c=0; 10692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int norm16=0; 10702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // only for isNormalized 10722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prevCC=0; 10732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 10752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // count code units below the minimum or with irrelevant data for the quick check 10762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(prevSrc=src; src!=limit;) { 10772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( (c=s.charAt(src))<minNoMaybeCP || 10782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller isCompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c)) 10792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ) { 10802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++src; 10812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(!UTF16.isSurrogate((char)c)) { 10822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 10832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 10842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c2; 10852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(UTF16Plus.isSurrogateLead(c)) { 10862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) { 10872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=Character.toCodePoint((char)c, c2); 10882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else /* trail surrogate */ { 10902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) { 10912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --src; 10922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=Character.toCodePoint(c2, (char)c); 10932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isCompYesAndZeroCC(norm16=getNorm16(c))) { 10962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller src+=Character.charCount(c); 10972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 10982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 10992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // copy these code units all at once 11032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(src!=prevSrc) { 11042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(src==limit) { 11052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(doCompose) { 11062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.flushAndAppendZeroCC(s, prevSrc, src); 11072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 11092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Set prevBoundary to the last character in the quick check loop. 11112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevBoundary=src-1; 11122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( Character.isLowSurrogate(s.charAt(prevBoundary)) && prevSrc<prevBoundary && 11132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Character.isHighSurrogate(s.charAt(prevBoundary-1)) 11142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ) { 11152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --prevBoundary; 11162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(doCompose) { 11182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The last "quick check yes" character is excluded from the 11192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // flush-and-append call in case it needs to be modified. 11202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.flushAndAppendZeroCC(s, prevSrc, prevBoundary); 11212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(s, prevBoundary, src); 11222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 11232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevCC=0; 11242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The start of the current character (c). 11262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevSrc=src; 11272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(src==limit) { 11282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 11292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller src+=Character.charCount(c); 11322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 11332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo. 11342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward) 11352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * or has ccc!=0. 11362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Check for Jamo V/T, then for regular characters. 11372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * c is not a Hangul syllable or Jamo L because those have "yes" properties. 11382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 11392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isJamoVT(norm16) && prevBoundary!=prevSrc) { 11402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char prev=s.charAt(prevSrc-1); 11412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean needToDecompose=false; 11422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c<Hangul.JAMO_T_BASE) { 11432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T. 11442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prev-=Hangul.JAMO_L_BASE; 11452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(prev<Hangul.JAMO_L_COUNT) { 11462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!doCompose) { 11472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 11482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char syllable=(char) 11502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (Hangul.HANGUL_BASE+ 11512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (prev*Hangul.JAMO_V_COUNT+(c-Hangul.JAMO_V_BASE))* 11522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Hangul.JAMO_T_COUNT); 11532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char t; 11542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(src!=limit && (t=(char)(s.charAt(src)-Hangul.JAMO_T_BASE))<Hangul.JAMO_T_COUNT) { 11552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++src; 11562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller syllable+=t; // The next character was a Jamo T. 11572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevBoundary=src; 11582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.setLastChar(syllable); 11592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 11602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If we see L+V+x where x!=T then we drop to the slow path, 11622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // decompose and recompose. 11632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // This is to deal with NFKC finding normal L and V but a 11642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // compatibility variant of a T. We need to either fully compose that 11652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // combination here (which would complicate the code and may not work 11662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // with strange custom data) or use the slow path -- or else our replacing 11672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // two input characters (L+V) with one output character (LV syllable) 11682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // would violate the invariant that [prevBoundary..prevSrc[ has the same 11692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // length as what we appended to the buffer since prevBoundary. 11702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller needToDecompose=true; 11712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(Hangul.isHangulWithoutJamoT(prev)) { 11732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c is a Jamo Trailing consonant, 11742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // compose with previous Hangul LV that does not contain a Jamo T. 11752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!doCompose) { 11762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 11772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.setLastChar((char)(prev+c-Hangul.JAMO_T_BASE)); 11792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevBoundary=src; 11802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 11812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!needToDecompose) { 11832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The Jamo V/T did not compose into a Hangul syllable. 11842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(doCompose) { 11852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append((char)c); 11862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 11872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevCC=0; 11882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 11902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 11932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Source buffer pointers: 11942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 11952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * all done quick check current char not yet 11962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * "yes" but (c) processed 11972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * may combine 11982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * forward 11992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * [-------------[-------------[-------------[-------------[ 12002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * | | | | | 12012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * orig. src prevBoundary prevSrc src limit 12022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 12032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 12042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Destination buffer pointers inside the ReorderingBuffer: 12052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 12062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * all done might take not filled yet 12072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * characters for 12082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * reordering 12092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * [-------------[-------------[-------------[ 12102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * | | | | 12112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * start reorderStart limit | 12122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * +remainingCap.+ 12132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 12142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(norm16>=MIN_YES_YES_WITH_CC) { 12152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int cc=norm16&0xff; // cc!=0 12162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( onlyContiguous && // FCC 12172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (doCompose ? buffer.getLastCC() : prevCC)==0 && 12182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevBoundary<prevSrc && 12192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // buffer.getLastCC()==0 && prevBoundary<prevSrc tell us that 12202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // [prevBoundary..prevSrc[ (which is exactly one character under these conditions) 12212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // passed the quick check "yes && ccc==0" test. 12222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Check whether the last character was a "yesYes" or a "yesNo". 12232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If a "yesNo", then we get its trailing ccc from its 12242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // mapping and check for canonical order. 12252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // All other cases are ok. 12262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller getTrailCCFromCompYesAndZeroCC(s, prevBoundary, prevSrc)>cc 12272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ) { 12282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Fails FCD test, need to decompose and contiguously recompose. 12292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!doCompose) { 12302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 12312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(doCompose) { 12332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(c, cc); 12342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 12352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(prevCC<=cc) { 12362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevCC=cc; 12372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 12382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 12392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 12402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(!doCompose && !isMaybeOrNonZeroCC(norm16)) { 12422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 12432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 12462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Find appropriate boundaries around this character, 12472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * decompose the source text from between the boundaries, 12482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and recompose it. 12492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 12502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * We may need to remove the last few characters from the ReorderingBuffer 12512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * to account for source text that was copied or appended 12522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * but needs to take part in the recomposition. 12532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 12542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 12562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Find the last composition boundary in [prevBoundary..src[. 12572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * It is either the decomposition of the current character (at prevSrc), 12582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * or prevBoundary. 12592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 12602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(hasCompBoundaryBefore(c, norm16)) { 12612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevBoundary=prevSrc; 12622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(doCompose) { 12632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.removeSuffix(prevSrc-prevBoundary); 12642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Find the next composition boundary in [src..limit[ - 12672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // modifies src to point to the next starter. 12682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller src=findNextCompBoundary(s, src, limit); 12692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Decompose [prevBoundary..src[ into the buffer and then recompose that part of it. 12712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int recomposeStartIndex=buffer.length(); 12722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller decomposeShort(s, prevBoundary, src, buffer); 12732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller recompose(buffer, recomposeStartIndex, onlyContiguous); 12742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!doCompose) { 12752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!buffer.equals(s, prevBoundary, src)) { 12762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 12772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.remove(); 12792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevCC=0; 12802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Move to the next starter. We never need to look back before this point again. 12832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevBoundary=src; 12842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 12862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 12882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Very similar to compose(): Make the same changes in both places if relevant. 12892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * doSpan: spanQuickCheckYes (ignore bit 0 of the return value) 12902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * !doSpan: quickCheck 12912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return bits 31..1: spanQuickCheckYes (==s.length() if "yes") and 12922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * bit 0: set if "maybe"; otherwise, if the span length<s.length() 12932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * then the quick check result is "no" 12942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 12952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int composeQuickCheck(CharSequence s, int src, int limit, 12962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean onlyContiguous, boolean doSpan) { 12972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int qcResult=0; 12982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int minNoMaybeCP=minCompNoMaybeCP; 12992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 13012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * prevBoundary points to the last character before the current one 13022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * that has a composition boundary before it with ccc==0 and quick check "yes". 13032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 13042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prevBoundary=src; 13052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prevSrc; 13062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c=0; 13072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int norm16=0; 13082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prevCC=0; 13092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 13112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // count code units below the minimum or with irrelevant data for the quick check 13122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(prevSrc=src;;) { 13132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(src==limit) { 13142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (src<<1)|qcResult; // "yes" or "maybe" 13152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( (c=s.charAt(src))<minNoMaybeCP || 13172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller isCompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c)) 13182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ) { 13192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++src; 13202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(!UTF16.isSurrogate((char)c)) { 13212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 13222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 13232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c2; 13242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(UTF16Plus.isSurrogateLead(c)) { 13252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) { 13262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=Character.toCodePoint((char)c, c2); 13272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else /* trail surrogate */ { 13292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) { 13302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --src; 13312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=Character.toCodePoint(c2, (char)c); 13322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isCompYesAndZeroCC(norm16=getNorm16(c))) { 13352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller src+=Character.charCount(c); 13362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 13372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 13382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(src!=prevSrc) { 13422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Set prevBoundary to the last character in the quick check loop. 13432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevBoundary=src-1; 13442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( Character.isLowSurrogate(s.charAt(prevBoundary)) && prevSrc<prevBoundary && 13452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Character.isHighSurrogate(s.charAt(prevBoundary-1)) 13462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ) { 13472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --prevBoundary; 13482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevCC=0; 13502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The start of the current character (c). 13512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevSrc=src; 13522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller src+=Character.charCount(c); 13552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 13562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo. 13572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward) 13582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * or has ccc!=0. 13592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 13602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isMaybeOrNonZeroCC(norm16)) { 13612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int cc=getCCFromYesOrMaybe(norm16); 13622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( onlyContiguous && // FCC 13632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cc!=0 && 13642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevCC==0 && 13652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevBoundary<prevSrc && 13662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // prevCC==0 && prevBoundary<prevSrc tell us that 13672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // [prevBoundary..prevSrc[ (which is exactly one character under these conditions) 13682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // passed the quick check "yes && ccc==0" test. 13692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Check whether the last character was a "yesYes" or a "yesNo". 13702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If a "yesNo", then we get its trailing ccc from its 13712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // mapping and check for canonical order. 13722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // All other cases are ok. 13732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller getTrailCCFromCompYesAndZeroCC(s, prevBoundary, prevSrc)>cc 13742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ) { 13752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Fails FCD test. 13762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(prevCC<=cc || cc==0) { 13772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevCC=cc; 13782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(norm16<MIN_YES_YES_WITH_CC) { 13792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!doSpan) { 13802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller qcResult=1; 13812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 13822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return prevBoundary<<1; // spanYes does not care to know it's "maybe" 13832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 13862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return prevBoundary<<1; // "no" 13892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void composeAndAppend(CharSequence s, 13922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean doCompose, 13932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean onlyContiguous, 13942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ReorderingBuffer buffer) { 13952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int src=0, limit=s.length(); 13962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!buffer.isEmpty()) { 13972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int firstStarterInSrc=findNextCompBoundary(s, 0, limit); 13982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(0!=firstStarterInSrc) { 13992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int lastStarterInDest=findPreviousCompBoundary(buffer.getStringBuilder(), 14002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.length()); 14012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder middle=new StringBuilder((buffer.length()-lastStarterInDest)+ 14022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller firstStarterInSrc+16); 14032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller middle.append(buffer.getStringBuilder(), lastStarterInDest, buffer.length()); 14042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.removeSuffix(buffer.length()-lastStarterInDest); 14052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller middle.append(s, 0, firstStarterInSrc); 14062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller compose(middle, 0, middle.length(), onlyContiguous, true, buffer); 14072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller src=firstStarterInSrc; 14082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(doCompose) { 14112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller compose(s, src, limit, onlyContiguous, true, buffer); 14122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 14132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(s, src, limit); 14142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Dual functionality: 14172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // buffer!=NULL: normalize 14182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes 14192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int makeFCD(CharSequence s, int src, int limit, ReorderingBuffer buffer) { 14202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Note: In this function we use buffer->appendZeroCC() because we track 14212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the lead and trail combining classes here, rather than leaving it to 14222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the ReorderingBuffer. 14232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The exception is the call to decomposeShort() which uses the buffer 14242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // in the normal way. 14252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1. 14272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Similar to the prevBoundary in the compose() implementation. 14282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prevBoundary=src; 14292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prevSrc; 14302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c=0; 14312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prevFCD16=0; 14322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int fcd16=0; 14332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 14352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // count code units with lccc==0 14362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(prevSrc=src; src!=limit;) { 14372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((c=s.charAt(src))<MIN_CCC_LCCC_CP) { 14382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevFCD16=~c; 14392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++src; 14402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(!singleLeadMightHaveNonZeroFCD16(c)) { 14412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevFCD16=0; 14422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++src; 14432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 14442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(UTF16.isSurrogate((char)c)) { 14452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c2; 14462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(UTF16Plus.isSurrogateLead(c)) { 14472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) { 14482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=Character.toCodePoint((char)c, c2); 14492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else /* trail surrogate */ { 14512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) { 14522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --src; 14532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=Character.toCodePoint(c2, (char)c); 14542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((fcd16=getFCD16FromNormData(c))<=0xff) { 14582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevFCD16=fcd16; 14592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller src+=Character.charCount(c); 14602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 14612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 14622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // copy these code units all at once 14662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(src!=prevSrc) { 14672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(src==limit) { 14682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(buffer!=null) { 14692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.flushAndAppendZeroCC(s, prevSrc, src); 14702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 14722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevBoundary=src; 14742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We know that the previous character's lccc==0. 14752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(prevFCD16<0) { 14762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Fetching the fcd16 value was deferred for this below-U+0300 code point. 14772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prev=~prevFCD16; 14782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevFCD16= prev<0x180 ? tccc180[prev] : getFCD16FromNormData(prev); 14792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(prevFCD16>1) { 14802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --prevBoundary; 14812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 14832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int p=src-1; 14842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( Character.isLowSurrogate(s.charAt(p)) && prevSrc<p && 14852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Character.isHighSurrogate(s.charAt(p-1)) 14862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ) { 14872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --p; 14882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Need to fetch the previous character's FCD value because 14892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // prevFCD16 was just for the trail surrogate code point. 14902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevFCD16=getFCD16FromNormData(Character.toCodePoint(s.charAt(p), s.charAt(p+1))); 14912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Still known to have lccc==0 because its lead surrogate unit had lccc==0. 14922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(prevFCD16>1) { 14942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevBoundary=p; 14952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(buffer!=null) { 14982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The last lccc==0 character is excluded from the 14992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // flush-and-append call in case it needs to be modified. 15002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.flushAndAppendZeroCC(s, prevSrc, prevBoundary); 15012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(s, prevBoundary, src); 15022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The start of the current character (c). 15042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevSrc=src; 15052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(src==limit) { 15062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 15072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 15092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller src+=Character.charCount(c); 15102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The current character (c) at [prevSrc..src[ has a non-zero lead combining class. 15112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Check for proper order, and decompose locally if necessary. 15122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((prevFCD16&0xff)<=(fcd16>>8)) { 15132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // proper order: prev tccc <= current lccc 15142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((fcd16&0xff)<=1) { 15152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevBoundary=src; 15162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(buffer!=null) { 15182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.appendZeroCC(c); 15192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevFCD16=fcd16; 15212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 15222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(buffer==null) { 15232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return prevBoundary; // quick check "no" 15242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 15252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 15262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Back out the part of the source that we copied or appended 15272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * already but is now going to be decomposed. 15282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * prevSrc is set to after what was copied/appended. 15292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 15302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.removeSuffix(prevSrc-prevBoundary); 15312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 15322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Find the part of the source that needs to be decomposed, 15332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * up to the next safe boundary. 15342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 15352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller src=findNextFCDBoundary(s, src, limit); 15362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 15372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The source text does not fulfill the conditions for FCD. 15382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Decompose and reorder a limited piece of the text. 15392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 15402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller decomposeShort(s, prevBoundary, src, buffer); 15412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevBoundary=src; 15422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevFCD16=0; 15432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return src; 15462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void makeFCDAndAppend(CharSequence s, boolean doMakeFCD, ReorderingBuffer buffer) { 15482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int src=0, limit=s.length(); 15492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!buffer.isEmpty()) { 15502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int firstBoundaryInSrc=findNextFCDBoundary(s, 0, limit); 15512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(0!=firstBoundaryInSrc) { 15522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStringBuilder(), 15532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.length()); 15542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder middle=new StringBuilder((buffer.length()-lastBoundaryInDest)+ 15552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller firstBoundaryInSrc+16); 15562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller middle.append(buffer.getStringBuilder(), lastBoundaryInDest, buffer.length()); 15572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.removeSuffix(buffer.length()-lastBoundaryInDest); 15582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller middle.append(s, 0, firstBoundaryInSrc); 15592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller makeFCD(middle, 0, middle.length(), buffer); 15602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller src=firstBoundaryInSrc; 15612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(doMakeFCD) { 15642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller makeFCD(s, src, limit, buffer); 15652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 15662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(s, src, limit); 15672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 15702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Note: hasDecompBoundary() could be implemented as aliases to 15712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // hasFCDBoundaryBefore() and hasFCDBoundaryAfter() 15722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // at the cost of building the FCD trie for a decomposition normalizer. 15732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean hasDecompBoundary(int c, boolean before) { 15742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 15752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c<minDecompNoCP) { 15762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 15772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int norm16=getNorm16(c); 15792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isHangul(norm16) || isDecompYesAndZeroCC(norm16)) { 15802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 15812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(norm16>MIN_NORMAL_MAYBE_YES) { 15822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; // ccc!=0 15832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(isDecompNoAlgorithmic(norm16)) { 15842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=mapAlgorithmic(c, norm16); 15852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 15862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c decomposes, get everything from the variable-length extra data 15872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int firstUnit=extraData.charAt(norm16); 15882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((firstUnit&MAPPING_LENGTH_MASK)==0) { 15892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 15902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!before) { 15922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // decomp after-boundary: same as hasFCDBoundaryAfter(), 15932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // fcd16<=1 || trailCC==0 15942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(firstUnit>0x1ff) { 15952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; // trailCC>1 15962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(firstUnit<=0xff) { 15982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; // trailCC==0 15992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if(trailCC==1) test leadCC==0, same as checking for before-boundary 16012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // true if leadCC==0 (hasFCDBoundaryBefore()) 16032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (extraData.charAt(norm16-1)&0xff00)==0; 16042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean isDecompInert(int c) { return isDecompYesAndZeroCC(getNorm16(c)); } 16082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 16092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean hasCompBoundaryBefore(int c) { 16102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return c<minCompNoMaybeCP || hasCompBoundaryBefore(c, getNorm16(c)); 16112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean hasCompBoundaryAfter(int c, boolean onlyContiguous, boolean testInert) { 16132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 16142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int norm16=getNorm16(c); 16152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isInert(norm16)) { 16162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 16172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(norm16<=minYesNo) { 16182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Hangul: norm16==minYesNo 16192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Hangul LVT has a boundary after it. 16202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Hangul LV and non-inert yesYes characters combine forward. 16212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return isHangul(norm16) && !Hangul.isHangulWithoutJamoT((char)c); 16222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(norm16>= (testInert ? minNoNo : minMaybeYes)) { 16232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 16242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(isDecompNoAlgorithmic(norm16)) { 16252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=mapAlgorithmic(c, norm16); 16262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 16272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c decomposes, get everything from the variable-length extra data. 16282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If testInert, then c must be a yesNo character which has lccc=0, 16292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // otherwise it could be a noNo. 16302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int firstUnit=extraData.charAt(norm16); 16312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // true if 16322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // not MAPPING_NO_COMP_BOUNDARY_AFTER 16332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // (which is set if 16342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c is not deleted, and 16352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // it and its decomposition do not combine forward, and it has a starter) 16362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and if FCC then trailCC<=1 16372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 16382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (firstUnit&MAPPING_NO_COMP_BOUNDARY_AFTER)==0 && 16392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (!onlyContiguous || firstUnit<=0x1ff); 16402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 16442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean hasFCDBoundaryBefore(int c) { return c<MIN_CCC_LCCC_CP || getFCD16(c)<=0xff; } 16452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean hasFCDBoundaryAfter(int c) { 16462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int fcd16=getFCD16(c); 16472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return fcd16<=1 || (fcd16&0xff)==0; 16482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean isFCDInert(int c) { return getFCD16(c)<=1; } 16502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 16512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean isMaybe(int norm16) { return minMaybeYes<=norm16 && norm16<=JAMO_VT; } 16522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean isMaybeOrNonZeroCC(int norm16) { return norm16>=minMaybeYes; } 16532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static boolean isInert(int norm16) { return norm16==0; } 16542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static boolean isJamoL(int norm16) { return norm16==1; } 16552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static boolean isJamoVT(int norm16) { return norm16==JAMO_VT; } 16562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean isHangul(int norm16) { return norm16==minYesNo; } 16572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean isCompYesAndZeroCC(int norm16) { return norm16<minNoNo; } 16582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // UBool isCompYes(uint16_t norm16) const { 16592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo; 16602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // } 16612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // UBool isCompYesOrMaybe(uint16_t norm16) const { 16622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // return norm16<minNoNo || minMaybeYes<=norm16; 16632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // } 16642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // private boolean hasZeroCCFromDecompYes(int norm16) { 16652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // return norm16<=MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT; 16662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // } 16672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean isDecompYesAndZeroCC(int norm16) { 16682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return norm16<minYesNo || 16692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller norm16==JAMO_VT || 16702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (minMaybeYes<=norm16 && norm16<=MIN_NORMAL_MAYBE_YES); 16712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 16732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * A little faster and simpler than isDecompYesAndZeroCC() but does not include 16742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the MaybeYes which combine-forward and have ccc=0. 16752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (Standard Unicode 5.2 normalization does not have such characters.) 16762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 16772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean isMostDecompYesAndZeroCC(int norm16) { 16782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return norm16<minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT; 16792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean isDecompNoAlgorithmic(int norm16) { return norm16>=limitNoNo; } 16812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 16822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // For use with isCompYes(). 16832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC. 16842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // static uint8_t getCCFromYes(uint16_t norm16) { 16852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // return norm16>=MIN_YES_YES_WITH_CC ? (uint8_t)norm16 : 0; 16862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // } 16872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int getCCFromNoNo(int norm16) { 16882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((extraData.charAt(norm16)&MAPPING_HAS_CCC_LCCC_WORD)!=0) { 16892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return extraData.charAt(norm16-1)&0xff; 16902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 16912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 16922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // requires that the [cpStart..cpLimit[ character passes isCompYesAndZeroCC() 16952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int getTrailCCFromCompYesAndZeroCC(CharSequence s, int cpStart, int cpLimit) { 16962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c; 16972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(cpStart==(cpLimit-1)) { 16982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=s.charAt(cpStart); 16992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 17002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=Character.codePointAt(s, cpStart); 17012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prevNorm16=getNorm16(c); 17032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(prevNorm16<=minYesNo) { 17042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; // yesYes and Hangul LV/LVT have ccc=tccc=0 17052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 17062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return extraData.charAt(prevNorm16)>>8; // tccc from yesNo 17072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 17102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Requires algorithmic-NoNo. 17112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int mapAlgorithmic(int c, int norm16) { 17122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return c+norm16-(minMaybeYes-MAX_DELTA-1); 17132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 17152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Requires minYesNo<norm16<limitNoNo. 17162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // private int getMapping(int norm16) { return /*extraData+*/norm16; } 17172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 17182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 17192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return index into maybeYesCompositions, or -1 17202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 17212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int getCompositionsListForDecompYes(int norm16) { 17222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(norm16==0 || MIN_NORMAL_MAYBE_YES<=norm16) { 17232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 17242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 17252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((norm16-=minMaybeYes)<0) { 17262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // norm16<minMaybeYes: index into extraData which is a substring at 17272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // maybeYesCompositions[MIN_NORMAL_MAYBE_YES-minMaybeYes] 17282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // same as (MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16 17292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller norm16+=MIN_NORMAL_MAYBE_YES; // for yesYes; if Jamo L: harmless empty list 17302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return norm16; 17322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 17352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return index into maybeYesCompositions 17362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 17372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int getCompositionsListForComposite(int norm16) { 17382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // composite has both mapping & compositions list 17392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int firstUnit=extraData.charAt(norm16); 17402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16+ // mapping in maybeYesCompositions 17412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1+ // +1 to skip the first unit with the mapping lenth 17422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (firstUnit&MAPPING_LENGTH_MASK); // + mapping length 17432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 17452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c code point must have compositions 17462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return index into maybeYesCompositions 17472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 17482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int getCompositionsList(int norm16) { 17492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return isDecompYes(norm16) ? 17502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller getCompositionsListForDecompYes(norm16) : 17512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller getCompositionsListForComposite(norm16); 17522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 17542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Decompose a short piece of text which is likely to contain characters that 17552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // fail the quick check loop and/or where the quick check loop's overhead 17562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // is unlikely to be amortized. 17572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Called by the compose() and makeFCD() implementations. 17582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Public in Java for collation implementation code. 17592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void decomposeShort(CharSequence s, int src, int limit, 17602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ReorderingBuffer buffer) { 17612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(src<limit) { 17622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c=Character.codePointAt(s, src); 17632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller src+=Character.charCount(c); 17642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller decompose(c, getNorm16(c), buffer); 17652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void decompose(int c, int norm16, 17682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ReorderingBuffer buffer) { 17692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Only loops for 1:1 algorithmic mappings. 17702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 17712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // get the decomposition and the lead and trail cc's 17722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isDecompYes(norm16)) { 17732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c does not decompose 17742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(c, getCCFromYesOrMaybe(norm16)); 17752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(isHangul(norm16)) { 17762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Hangul syllable: decompose algorithmically 17772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Hangul.decompose(c, buffer); 17782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(isDecompNoAlgorithmic(norm16)) { 17792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=mapAlgorithmic(c, norm16); 17802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller norm16=getNorm16(c); 17812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 17822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 17832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c decomposes, get everything from the variable-length extra data 17842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int firstUnit=extraData.charAt(norm16); 17852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int length=firstUnit&MAPPING_LENGTH_MASK; 17862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int leadCC, trailCC; 17872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller trailCC=firstUnit>>8; 17882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) { 17892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller leadCC=extraData.charAt(norm16-1)>>8; 17902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 17912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller leadCC=0; 17922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++norm16; // skip over the firstUnit 17942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(extraData, norm16, norm16+length, leadCC, trailCC); 17952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 17972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 18002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 18012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Finds the recomposition result for 18022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a forward-combining "lead" character, 18032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * specified with a pointer to its compositions list, 18042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and a backward-combining "trail" character. 18052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 18062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>If the lead and trail characters combine, then this function returns 18072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the following "compositeAndFwd" value: 18082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <pre> 18092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Bits 21..1 composite character 18102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Bit 0 set if the composite is a forward-combining starter 18112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </pre> 18122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * otherwise it returns -1. 18132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 18142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>The compositions list has (trail, compositeAndFwd) pair entries, 18152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * encoded as either pairs or triples of 16-bit units. 18162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The last entry has the high bit of its first unit set. 18172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 18182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>The list is sorted by ascending trail characters (there are no duplicates). 18192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * A linear search is used. 18202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 18212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>See normalizer2impl.h for a more detailed description 18222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * of the compositions list format. 18232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 18242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static int combine(String compositions, int list, int trail) { 18252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int key1, firstUnit; 18262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(trail<COMP_1_TRAIL_LIMIT) { 18272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // trail character is 0..33FF 18282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // result entry may have 2 or 3 units 18292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller key1=(trail<<1); 18302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(key1>(firstUnit=compositions.charAt(list))) { 18312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list+=2+(firstUnit&COMP_1_TRIPLE); 18322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(key1==(firstUnit&COMP_1_TRAIL_MASK)) { 18342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((firstUnit&COMP_1_TRIPLE)!=0) { 18352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ((int)compositions.charAt(list+1)<<16)|compositions.charAt(list+2); 18362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 18372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return compositions.charAt(list+1); 18382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 18412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // trail character is 3400..10FFFF 18422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // result entry has 3 units 18432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller key1=COMP_1_TRAIL_LIMIT+(((trail>>COMP_1_TRAIL_SHIFT))&~COMP_1_TRIPLE); 18442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int key2=(trail<<COMP_2_TRAIL_SHIFT)&0xffff; 18452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int secondUnit; 18462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 18472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(key1>(firstUnit=compositions.charAt(list))) { 18482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list+=2+(firstUnit&COMP_1_TRIPLE); 18492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) { 18502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(key2>(secondUnit=compositions.charAt(list+1))) { 18512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((firstUnit&COMP_1_LAST_TUPLE)!=0) { 18522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 18532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 18542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list+=3; 18552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) { 18572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ((secondUnit&~COMP_2_TRAIL_MASK)<<16)|compositions.charAt(list+2); 18582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 18592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 18602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 18622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 18632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 18672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 18692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param list some character's compositions list 18702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param set recursively receives the composites from these compositions 18712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 18722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void addComposites(int list, UnicodeSet set) { 18732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int firstUnit, compositeAndFwd; 18742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller do { 18752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller firstUnit=maybeYesCompositions.charAt(list); 18762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((firstUnit&COMP_1_TRIPLE)==0) { 18772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller compositeAndFwd=maybeYesCompositions.charAt(list+1); 18782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list+=2; 18792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 18802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller compositeAndFwd=(((int)maybeYesCompositions.charAt(list+1)&~COMP_2_TRAIL_MASK)<<16)| 18812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller maybeYesCompositions.charAt(list+2); 18822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list+=3; 18832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int composite=compositeAndFwd>>1; 18852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((compositeAndFwd&1)!=0) { 18862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller addComposites(getCompositionsListForComposite(getNorm16(composite)), set); 18872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(composite); 18892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } while((firstUnit&COMP_1_LAST_TUPLE)==0); 18902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 18922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Recomposes the buffer text starting at recomposeStartIndex 18932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (which is in NFD - decomposed and canonically ordered), 18942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and truncates the buffer contents. 18952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 18962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Note that recomposition never lengthens the text: 18972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Any character consists of either one or two code units; 18982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a composition may contain at most one more code unit than the original starter, 18992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * while the combining mark that is removed has at least one code unit. 19002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 19012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void recompose(ReorderingBuffer buffer, int recomposeStartIndex, 19022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean onlyContiguous) { 19032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder sb=buffer.getStringBuilder(); 19042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int p=recomposeStartIndex; 19052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(p==sb.length()) { 19062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 19072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 19092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int starter, pRemove; 19102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int compositionsList; 19112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c, compositeAndFwd; 19122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int norm16; 19132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int cc, prevCC; 19142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean starterIsSupplementary; 19152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 19162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Some of the following variables are not used until we have a forward-combining starter 19172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and are only initialized now to avoid compiler warnings. 19182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller compositionsList=-1; // used as indicator for whether we have a forward-combining starter 19192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller starter=-1; 19202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller starterIsSupplementary=false; 19212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevCC=0; 19222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 19232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 19242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=sb.codePointAt(p); 19252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p+=Character.charCount(c); 19262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller norm16=getNorm16(c); 19272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cc=getCCFromYesOrMaybe(norm16); 19282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( // this character combines backward and 19292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller isMaybe(norm16) && 19302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // we have seen a starter that combines forward and 19312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller compositionsList>=0 && 19322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the backward-combining character is not blocked 19332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (prevCC<cc || prevCC==0) 19342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ) { 19352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isJamoVT(norm16)) { 19362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c is a Jamo V/T, see if we can compose it with the previous character. 19372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c<Hangul.JAMO_T_BASE) { 19382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T. 19392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char prev=(char)(sb.charAt(starter)-Hangul.JAMO_L_BASE); 19402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(prev<Hangul.JAMO_L_COUNT) { 19412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pRemove=p-1; 19422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char syllable=(char) 19432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (Hangul.HANGUL_BASE+ 19442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (prev*Hangul.JAMO_V_COUNT+(c-Hangul.JAMO_V_BASE))* 19452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Hangul.JAMO_T_COUNT); 19462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char t; 19472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(p!=sb.length() && (t=(char)(sb.charAt(p)-Hangul.JAMO_T_BASE))<Hangul.JAMO_T_COUNT) { 19482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++p; 19492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller syllable+=t; // The next character was a Jamo T. 19502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sb.setCharAt(starter, syllable); 19522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // remove the Jamo V/T 19532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sb.delete(pRemove, p); 19542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p=pRemove; 19552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 19582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * No "else" for Jamo T: 19592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Since the input is in NFD, there are no Hangul LV syllables that 19602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a Jamo T could combine with. 19612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * All Jamo Ts are combined above when handling Jamo Vs. 19622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 19632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(p==sb.length()) { 19642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 19652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller compositionsList=-1; 19672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 19682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if((compositeAndFwd=combine(maybeYesCompositions, compositionsList, c))>=0) { 19692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The starter and the combining mark (c) do combine. 19702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int composite=compositeAndFwd>>1; 19712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 19722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Remove the combining mark. 19732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pRemove=p-Character.charCount(c); // pRemove & p: start & limit of the combining mark 19742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sb.delete(pRemove, p); 19752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p=pRemove; 19762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Replace the starter with the composite. 19772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(starterIsSupplementary) { 19782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(composite>0xffff) { 19792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // both are supplementary 19802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sb.setCharAt(starter, UTF16.getLeadSurrogate(composite)); 19812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sb.setCharAt(starter+1, UTF16.getTrailSurrogate(composite)); 19822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 19832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sb.setCharAt(starter, (char)c); 19842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sb.deleteCharAt(starter+1); 19852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The composite is shorter than the starter, 19862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // move the intermediate characters forward one. 19872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller starterIsSupplementary=false; 19882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --p; 19892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(composite>0xffff) { 19912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The composite is longer than the starter, 19922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // move the intermediate characters back one. 19932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller starterIsSupplementary=true; 19942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sb.setCharAt(starter, UTF16.getLeadSurrogate(composite)); 19952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sb.insert(starter+1, UTF16.getTrailSurrogate(composite)); 19962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++p; 19972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 19982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // both are on the BMP 19992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sb.setCharAt(starter, (char)composite); 20002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 20022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Keep prevCC because we removed the combining mark. 20032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 20042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(p==sb.length()) { 20052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 20062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Is the composite a starter that combines forward? 20082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((compositeAndFwd&1)!=0) { 20092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller compositionsList= 20102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller getCompositionsListForComposite(getNorm16(composite)); 20112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 20122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller compositionsList=-1; 20132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 20152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We combined; continue with looking for compositions. 20162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 20172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 20202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // no combination this time 20212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevCC=cc; 20222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(p==sb.length()) { 20232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 20242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 20262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If c did not combine, then check if it is a starter. 20272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(cc==0) { 20282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Found a new starter. 20292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((compositionsList=getCompositionsListForDecompYes(norm16))>=0) { 20302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // It may combine with something, prepare for it. 20312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c<=0xffff) { 20322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller starterIsSupplementary=false; 20332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller starter=p-1; 20342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 20352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller starterIsSupplementary=true; 20362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller starter=p-2; 20372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(onlyContiguous) { 20402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // FCC: no discontiguous compositions; any intervening character blocks. 20412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller compositionsList=-1; 20422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.flush(); 20452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 20472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int composePair(int a, int b) { 20482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int norm16=getNorm16(a); // maps an out-of-range 'a' to inert norm16=0 20492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int list; 20502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isInert(norm16)) { 20512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 20522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(norm16<minYesNoMappingsOnly) { 20532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isJamoL(norm16)) { 20542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b-=Hangul.JAMO_V_BASE; 20552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(0<=b && b<Hangul.JAMO_V_COUNT) { 20562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 20572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (Hangul.HANGUL_BASE+ 20582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ((a-Hangul.JAMO_L_BASE)*Hangul.JAMO_V_COUNT+b)* 20592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Hangul.JAMO_T_COUNT); 20602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 20612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 20622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(isHangul(norm16)) { 20642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b-=Hangul.JAMO_T_BASE; 20652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(Hangul.isHangulWithoutJamoT((char)a) && 0<b && b<Hangul.JAMO_T_COUNT) { // not b==0! 20662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return a+b; 20672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 20682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 20692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 20712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 'a' has a compositions list in extraData 20722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list=norm16; 20732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(norm16>minYesNo) { // composite 'a' has both mapping & compositions list 20742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list+= // mapping pointer 20752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1+ // +1 to skip the first unit with the mapping lenth 20762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (extraData.charAt(list)&MAPPING_LENGTH_MASK); // + mapping length 20772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Turn the offset-into-extraData into an offset-into-maybeYesCompositions. 20792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list+=MIN_NORMAL_MAYBE_YES-minMaybeYes; 20802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(norm16<minMaybeYes || MIN_NORMAL_MAYBE_YES<=norm16) { 20822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 20832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 20842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list=norm16-minMaybeYes; // offset into maybeYesCompositions 20852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(b<0 || 0x10ffff<b) { // combine(list, b) requires a valid code point b 20872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 20882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return combine(maybeYesCompositions, list, b)>>1; 20902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 20922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 20932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Does c have a composition boundary before it? 20942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * True if its decomposition begins with a character that has 20952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * ccc=0 && NFC_QC=Yes (isCompYesAndZeroCC()). 20962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * As a shortcut, this is true if c itself has ccc=0 && NFC_QC=Yes 20972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (isCompYesAndZeroCC()) so we need not decompose. 20982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 20992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean hasCompBoundaryBefore(int c, int norm16) { 21002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 21012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isCompYesAndZeroCC(norm16)) { 21022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 21032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(isMaybeOrNonZeroCC(norm16)) { 21042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 21052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(isDecompNoAlgorithmic(norm16)) { 21062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=mapAlgorithmic(c, norm16); 21072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller norm16=getNorm16(c); 21082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 21092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c decomposes, get everything from the variable-length extra data 21102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int firstUnit=extraData.charAt(norm16); 21112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((firstUnit&MAPPING_LENGTH_MASK)==0) { 21122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 21132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0 && (extraData.charAt(norm16-1)&0xff00)!=0) { 21152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; // non-zero leadCC 21162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return isCompYesAndZeroCC(getNorm16(Character.codePointAt(extraData, norm16+1))); 21182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int findPreviousCompBoundary(CharSequence s, int p) { 21222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(p>0) { 21232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c=Character.codePointBefore(s, p); 21242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p-=Character.charCount(c); 21252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(hasCompBoundaryBefore(c)) { 21262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 21272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We could also test hasCompBoundaryAfter() and return iter.codePointLimit, 21292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // but that's probably not worth the extra cost. 21302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return p; 21322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int findNextCompBoundary(CharSequence s, int p, int limit) { 21342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(p<limit) { 21352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c=Character.codePointAt(s, p); 21362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int norm16=normTrie.get(c); 21372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(hasCompBoundaryBefore(c, norm16)) { 21382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 21392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p+=Character.charCount(c); 21412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return p; 21432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 21452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int findPreviousFCDBoundary(CharSequence s, int p) { 21462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(p>0) { 21472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c=Character.codePointBefore(s, p); 21482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p-=Character.charCount(c); 21492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c<MIN_CCC_LCCC_CP || getFCD16(c)<=0xff) { 21502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 21512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return p; 21542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int findNextFCDBoundary(CharSequence s, int p, int limit) { 21562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(p<limit) { 21572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c=Character.codePointAt(s, p); 21582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c<MIN_CCC_LCCC_CP || getFCD16(c)<=0xff) { 21592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 21602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p+=Character.charCount(c); 21622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return p; 21642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 21662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void addToStartSet(Trie2Writable newData, int origin, int decompLead) { 21672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int canonValue=newData.get(decompLead); 21682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((canonValue&(CANON_HAS_SET|CANON_VALUE_MASK))==0 && origin!=0) { 21692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // origin is the first character whose decomposition starts with 21702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the character for which we are setting the value. 21712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newData.set(decompLead, canonValue|origin); 21722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 21732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // origin is not the first character, or it is U+0000. 21742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSet set; 21752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((canonValue&CANON_HAS_SET)==0) { 21762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int firstOrigin=canonValue&CANON_VALUE_MASK; 21772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|canonStartSets.size(); 21782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newData.set(decompLead, canonValue); 21792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller canonStartSets.add(set=new UnicodeSet()); 21802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(firstOrigin!=0) { 21812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(firstOrigin); 21822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 21842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set=canonStartSets.get(canonValue&CANON_VALUE_MASK); 21852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(origin); 21872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 21902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller @SuppressWarnings("unused") 21912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private VersionInfo dataVersion; 21922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 21932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Code point thresholds for quick check codes. 21942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int minDecompNoCP; 21952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int minCompNoMaybeCP; 21962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 21972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Norm16 value thresholds for quick check combinations and types of extra data. 21982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int minYesNo; 21992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int minYesNoMappingsOnly; 22002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int minNoNo; 22012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int limitNoNo; 22022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int minMaybeYes; 22032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 22042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private Trie2_16 normTrie; 22052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private String maybeYesCompositions; 22062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private String extraData; // mappings and/or compositions for yesYes, yesNo & noNo characters 22072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private byte[] smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0 22082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int[] tccc180; // [0x180] tccc values for U+0000..U+017F 22092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 22102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private Trie2_32 canonIterData; 22112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private ArrayList<UnicodeSet> canonStartSets; 22122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 22132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // bits in canonIterData 22142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int CANON_NOT_SEGMENT_STARTER = 0x80000000; 22152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int CANON_HAS_COMPOSITIONS = 0x40000000; 22162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int CANON_HAS_SET = 0x200000; 22172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int CANON_VALUE_MASK = 0x1fffff; 22182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller} 2219