12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */ 2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others. 3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* 52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ******************************************************************************* 62ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Copyright (C) 2009-2015, International Business Machines 72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Corporation and others. All Rights Reserved. 82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ******************************************************************************* 92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.impl; 122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.IOException; 142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.nio.ByteBuffer; 152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.ArrayList; 162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.Iterator; 172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UTF16; 192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UnicodeSet; 202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.ICUUncheckedIOException; 212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.VersionInfo; 222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 231537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller/** 2405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Low-level implementation of the Unicode Normalization Algorithm. 2505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * For the data structure and details see the documentation at the end of 2605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * C++ normalizer2impl.h and in the design doc at 2705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * http://site.icu-project.org/design/normalization/custom 281537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller * @hide Only a subset of ICU is exposed in Android 29836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller */ 302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic final class Normalizer2Impl { 312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final class Hangul { 322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* Korean Hangul and Jamo constants */ 332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_L_BASE=0x1100; /* "lead" jamo */ 342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_L_END=0x1112; 352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_V_BASE=0x1161; /* "vowel" jamo */ 362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_V_END=0x1175; 372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_T_BASE=0x11a7; /* "trail" jamo */ 382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_T_END=0x11c2; 392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int HANGUL_BASE=0xac00; 412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int HANGUL_END=0xd7a3; 422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_L_COUNT=19; 442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_V_COUNT=21; 452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_T_COUNT=28; 462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_L_LIMIT=JAMO_L_BASE+JAMO_L_COUNT; 482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_V_LIMIT=JAMO_V_BASE+JAMO_V_COUNT; 492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int JAMO_VT_COUNT=JAMO_V_COUNT*JAMO_T_COUNT; 512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT; 532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT; 542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static boolean isHangul(int c) { 562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return HANGUL_BASE<=c && c<HANGUL_LIMIT; 572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public static boolean isHangulLV(int c) { 592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c-=HANGUL_BASE; 6005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return 0<=c && c<HANGUL_COUNT && c%JAMO_T_COUNT==0; 612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static boolean isJamoL(int c) { 632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return JAMO_L_BASE<=c && c<JAMO_L_LIMIT; 642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static boolean isJamoV(int c) { 662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return JAMO_V_BASE<=c && c<JAMO_V_LIMIT; 672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public static boolean isJamoT(int c) { 6905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int t=c-JAMO_T_BASE; 7005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return 0<t && t<JAMO_T_COUNT; // not JAMO_T_BASE itself 7105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 7205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public static boolean isJamo(int c) { 7305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return JAMO_L_BASE<=c && c<=JAMO_T_END && 7405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert (c<=JAMO_L_END || (JAMO_V_BASE<=c && c<=JAMO_V_END) || JAMO_T_BASE<c); 7505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Decomposes c, which must be a Hangul syllable, into buffer 792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and returns the length of the decomposition (2 or 3). 802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static int decompose(int c, Appendable buffer) { 822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c-=HANGUL_BASE; 842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c2=c%JAMO_T_COUNT; 852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c/=JAMO_T_COUNT; 862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append((char)(JAMO_L_BASE+c/JAMO_V_COUNT)); 872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append((char)(JAMO_V_BASE+c%JAMO_V_COUNT)); 882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c2==0) { 892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 2; 902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append((char)(JAMO_T_BASE+c2)); 922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 3; 932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch(IOException e) { 952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Will not occur because we do not write to I/O. 962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ICUUncheckedIOException(e); 972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Decomposes c, which must be a Hangul syllable, into buffer. 1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This is the raw, not recursive, decomposition. Its length is always 2. 1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static void getRawDecomposition(int c, Appendable buffer) { 1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int orig=c; 1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c-=HANGUL_BASE; 1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c2=c%JAMO_T_COUNT; 1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c2==0) { 1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c/=JAMO_T_COUNT; 1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append((char)(JAMO_L_BASE+c/JAMO_V_COUNT)); 1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append((char)(JAMO_V_BASE+c%JAMO_V_COUNT)); 1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append((char)(orig-c2)); // LV syllable 1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append((char)(JAMO_T_BASE+c2)); 1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch(IOException e) { 1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Will not occur because we do not write to I/O. 1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ICUUncheckedIOException(e); 1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Writable buffer that takes care of canonical ordering. 1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Its Appendable methods behave like the C++ implementation's 1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * appendZeroCC() methods. 1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p> 1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If dest is a StringBuilder, then the buffer writes directly to it. 1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Otherwise, the buffer maintains a StringBuilder for intermediate text segments 1312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * until no further changes are necessary and whole segments are appended. 1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * append() methods that take combining-class values always write to the StringBuilder. 1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Other append() methods flush and append to the Appendable. 1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final class ReorderingBuffer implements Appendable { 1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public ReorderingBuffer(Normalizer2Impl ni, Appendable dest, int destCapacity) { 1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller impl=ni; 1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller app=dest; 1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(app instanceof StringBuilder) { 1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appIsStringBuilder=true; 1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str=(StringBuilder)dest; 1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // In Java, the constructor subsumes public void init(int destCapacity) { 1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.ensureCapacity(destCapacity); 1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=0; 1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(str.length()==0) { 1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=0; 1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setIterator(); 1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=previousCC(); 1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Set reorderStart after the last code point with cc<=1 if there is one. 1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(lastCC>1) { 1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(previousCC()>1) {} 1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=codePointLimit; 1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appIsStringBuilder=false; 1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str=new StringBuilder(); 1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=0; 1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=0; 1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean isEmpty() { return str.length()==0; } 1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int length() { return str.length(); } 1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int getLastCC() { return lastCC; } 1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public StringBuilder getStringBuilder() { return str; } 1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean equals(CharSequence s, int start, int limit) { 1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return UTF16Plus.equal(str, 0, str.length(), s, start, limit); 1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void append(int c, int cc) { 1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(lastCC<=cc || cc==0) { 1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.appendCodePoint(c); 1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=cc; 1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(cc<=1) { 1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=str.length(); 1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller insert(c, cc); 1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // s must be in NFD, otherwise change the implementation. 1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void append(CharSequence s, int start, int limit, 1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int leadCC, int trailCC) { 1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(start==limit) { 1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(lastCC<=leadCC || leadCC==0) { 1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(trailCC<=1) { 1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=str.length()+(limit-start); 1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(leadCC<=1) { 1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=str.length()+1; // Ok if not a code point boundary. 1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.append(s, start, limit); 1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=trailCC; 1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c=Character.codePointAt(s, start); 2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller start+=Character.charCount(c); 2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller insert(c, leadCC); // insert first code point 2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(start<limit) { 2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=Character.codePointAt(s, start); 2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller start+=Character.charCount(c); 2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(start<limit) { 2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // s must be in NFD, otherwise we need to use getCC(). 2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller leadCC=getCCFromYesOrMaybe(impl.getNorm16(c)); 2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller leadCC=trailCC; 2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller append(c, leadCC); 2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The following append() methods work like C++ appendZeroCC(). 2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // They assume that the cc or trailCC of their input is 0. 2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Most of them implement Appendable interface methods. 219f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public ReorderingBuffer append(char c) { 2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.append(c); 2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=0; 2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=str.length(); 2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void appendZeroCC(int c) { 2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.appendCodePoint(c); 2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=0; 2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=str.length(); 2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 231f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public ReorderingBuffer append(CharSequence s) { 2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(s.length()!=0) { 2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.append(s); 2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=0; 2362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=str.length(); 2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 240f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public ReorderingBuffer append(CharSequence s, int start, int limit) { 2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(start!=limit) { 2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.append(s, start, limit); 2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=0; 2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=str.length(); 2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Flushes from the intermediate StringBuilder to the Appendable, 2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * if they are different objects. 2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Used after recomposition. 2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Must be called at the end when writing to a non-StringBuilder Appendable. 2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void flush() { 2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(appIsStringBuilder) { 2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=str.length(); 2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller app.append(str); 2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.setLength(0); 2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=0; 2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch(IOException e) { 2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ICUUncheckedIOException(e); // Avoid declaring "throws IOException". 2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=0; 2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Flushes from the intermediate StringBuilder to the Appendable, 2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * if they are different objects. 2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Then appends the new text to the Appendable or StringBuilder. 2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Normally used after quick check loops find a non-empty sequence. 2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public ReorderingBuffer flushAndAppendZeroCC(CharSequence s, int start, int limit) { 2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(appIsStringBuilder) { 2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.append(s, start, limit); 2782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=str.length(); 2792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller app.append(str).append(s, start, limit); 2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.setLength(0); 2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=0; 2842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch(IOException e) { 2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ICUUncheckedIOException(e); // Avoid declaring "throws IOException". 2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=0; 2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void remove() { 2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.setLength(0); 2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=0; 2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=0; 2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void removeSuffix(int suffixLength) { 2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int oldLength=str.length(); 2982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.delete(oldLength-suffixLength, oldLength); 2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lastCC=0; 3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=str.length(); 3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * TODO: Revisit whether it makes sense to track reorderStart. 3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * It is set to after the last known character with cc<=1, 3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * which stops previousCC() before it reads that character and looks up its cc. 3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * previousCC() is normally only called from insert(). 3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * In other words, reorderStart speeds up the insertion of a combining mark 3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * into a multi-combining mark sequence where it does not belong at the end. 3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This might not be worth the trouble. 3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * On the other hand, it's not a huge amount of trouble. 3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * We probably need it for UNORM_SIMPLE_APPEND. 3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Inserts c somewhere before the last character. 3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Requires 0<cc<lastCC which implies reorderStart<limit. 3182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void insert(int c, int cc) { 3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(setIterator(), skipPrevious(); previousCC()>cc;) {} 3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // insert c at codePointLimit, after the character with prevCC<=cc 3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c<=0xffff) { 3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.insert(codePointLimit, (char)c); 3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(cc<=1) { 3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=codePointLimit+1; 3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller str.insert(codePointLimit, Character.toChars(c)); 3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(cc<=1) { 3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reorderStart=codePointLimit+2; 3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final Normalizer2Impl impl; 3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final Appendable app; 3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final StringBuilder str; 3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final boolean appIsStringBuilder; 3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int reorderStart; 3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int lastCC; 3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // private backward iterator 3422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void setIterator() { codePointStart=str.length(); } 3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void skipPrevious() { // Requires 0<codePointStart. 3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller codePointLimit=codePointStart; 3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller codePointStart=str.offsetByCodePoints(codePointStart, -1); 3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int previousCC() { // Returns 0 if there is no previous character. 3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller codePointLimit=codePointStart; 3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(reorderStart>=codePointStart) { 3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c=str.codePointBefore(codePointStart); 3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller codePointStart-=Character.charCount(c); 35405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return impl.getCCFromYesOrMaybeCP(c); 3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int codePointStart, codePointLimit; 3582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // TODO: Propose as public API on the UTF16 class. 3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // TODO: Propose widening UTF16 methods that take char to take int. 3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // TODO: Propose widening UTF16 methods that take String to take CharSequence. 3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final class UTF16Plus { 3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Assuming c is a surrogate code point (UTF16.isSurrogate(c)), 3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * is it a lead surrogate? 3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c code unit or code point 3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true or false 3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static boolean isSurrogateLead(int c) { return (c&0x400)==0; } 3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Compares two CharSequence objects for binary equality. 3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s1 first sequence 3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s2 second sequence 3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if s1 contains the same text as s2 3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static boolean equal(CharSequence s1, CharSequence s2) { 3782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(s1==s2) { 3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int length=s1.length(); 3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(length!=s2.length()) { 3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i=0; i<length; ++i) { 3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(s1.charAt(i)!=s2.charAt(i)) { 3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Compares two CharSequence subsequences for binary equality. 3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s1 first sequence 3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param start1 start offset in first sequence 3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param limit1 limit offset in first sequence 3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s2 second sequence 3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param start2 start offset in second sequence 3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param limit2 limit offset in second sequence 4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if s1.subSequence(start1, limit1) contains the same text 4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * as s2.subSequence(start2, limit2) 4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static boolean equal(CharSequence s1, int start1, int limit1, 4042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CharSequence s2, int start2, int limit2) { 4052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((limit1-start1)!=(limit2-start2)) { 4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 4072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(s1==s2 && start1==start2) { 4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 4102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(start1<limit1) { 4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(s1.charAt(start1++)!=s2.charAt(start2++)) { 4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 4172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public Normalizer2Impl() {} 4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final class IsAcceptable implements ICUBinary.Authenticate { 423f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 4242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean isDataVersionAcceptable(byte version[]) { 42505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return version[0]==3; 4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable(); 4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int DATA_FORMAT = 0x4e726d32; // "Nrm2" 4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public Normalizer2Impl load(ByteBuffer bytes) { 4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dataVersion=ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE); 4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int indexesLength=bytes.getInt()/4; // inIndexes[IX_NORM_TRIE_OFFSET]/4 43505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(indexesLength<=IX_MIN_LCCC_CP) { 4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ICUUncheckedIOException("Normalizer2 data: not enough indexes"); 4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] inIndexes=new int[indexesLength]; 4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller inIndexes[0]=indexesLength*4; 4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i=1; i<indexesLength; ++i) { 4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller inIndexes[i]=bytes.getInt(); 4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP]; 4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP]; 44605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert minLcccCP=inIndexes[IX_MIN_LCCC_CP]; 4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller minYesNo=inIndexes[IX_MIN_YES_NO]; 4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller minYesNoMappingsOnly=inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY]; 4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller minNoNo=inIndexes[IX_MIN_NO_NO]; 45105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert minNoNoCompBoundaryBefore=inIndexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE]; 45205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert minNoNoCompNoMaybeCC=inIndexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC]; 45305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert minNoNoEmpty=inIndexes[IX_MIN_NO_NO_EMPTY]; 4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller limitNoNo=inIndexes[IX_LIMIT_NO_NO]; 4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller minMaybeYes=inIndexes[IX_MIN_MAYBE_YES]; 45605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert((minMaybeYes&7)==0); // 8-aligned for noNoDelta bit fields 45705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert centerNoNoDelta=(minMaybeYes>>DELTA_SHIFT)-MAX_DELTA-1; 4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Read the normTrie. 4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int offset=inIndexes[IX_NORM_TRIE_OFFSET]; 4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET]; 4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller normTrie=Trie2_16.createFromSerialized(bytes); 4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int trieLength=normTrie.getSerializedLength(); 4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(trieLength>(nextOffset-offset)) { 4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ICUUncheckedIOException("Normalizer2 data: not enough bytes for normTrie"); 4662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ICUBinary.skipBytes(bytes, (nextOffset-offset)-trieLength); // skip padding after trie bytes 4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Read the composition and mapping data. 4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller offset=nextOffset; 4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nextOffset=inIndexes[IX_SMALL_FCD_OFFSET]; 4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int numChars=(nextOffset-offset)/2; 4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(numChars!=0) { 4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller maybeYesCompositions=ICUBinary.getString(bytes, numChars, 0); 47505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert extraData=maybeYesCompositions.substring((MIN_NORMAL_MAYBE_YES-minMaybeYes)>>OFFSET_SHIFT); 4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // smallFCD: new in formatVersion 2 4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller offset=nextOffset; 4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller smallFCD=new byte[0x100]; 4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller bytes.get(smallFCD); 4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch(IOException e) { 4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new ICUUncheckedIOException(e); 4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public Normalizer2Impl load(String name) { 4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return load(ICUBinary.getRequiredData(name)); 4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void enumLcccRange(int start, int end, int norm16, UnicodeSet set) { 49305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (norm16 > MIN_NORMAL_MAYBE_YES && norm16 != JAMO_VT) { 49405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert set.add(start, end); 49505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else if (minNoNoCompNoMaybeCC <= norm16 && norm16 < limitNoNo) { 4962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int fcd16=getFCD16(start); 4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(fcd16>0xff) { set.add(start, end); } 4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void enumNorm16PropertyStartsRange(int start, int end, int value, UnicodeSet set) { 5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* add the start code point to the USet */ 5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(start); 50405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(start!=end && isAlgorithmicNoNo(value) && (value & DELTA_TCCC_MASK) > DELTA_TCCC_1) { 5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Range of code points with same-norm16-value algorithmic decompositions. 5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // They might have different non-zero FCD16 values. 5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prevFCD16=getFCD16(start); 5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(++start<=end) { 5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int fcd16=getFCD16(start); 5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(fcd16!=prevFCD16) { 5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(start); 5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevFCD16=fcd16; 5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void addLcccChars(UnicodeSet set) { 5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Iterator<Trie2.Range> trieIterator=normTrie.iterator(); 5202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Trie2.Range range; 5212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 5222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller enumLcccRange(range.startCodePoint, range.endCodePoint, range.value, set); 5232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void addPropertyStarts(UnicodeSet set) { 5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* add the start code point of each same-value range of each trie */ 5282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Iterator<Trie2.Range> trieIterator=normTrie.iterator(); 5292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Trie2.Range range; 5302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 5312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller enumNorm16PropertyStartsRange(range.startCodePoint, range.endCodePoint, range.value, set); 5322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* add Hangul LV syllables and LV+1 because of skippables */ 5352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int c=Hangul.HANGUL_BASE; c<Hangul.HANGUL_LIMIT; c+=Hangul.JAMO_T_COUNT) { 5362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(c); 5372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(c+1); 5382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(Hangul.HANGUL_LIMIT); /* add Hangul+1 to continue with other properties */ 5402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void addCanonIterPropertyStarts(UnicodeSet set) { 5432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* add the start code point of each same-value range of the canonical iterator data trie */ 5442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ensureCanonIterData(); 5452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // currently only used for the SEGMENT_STARTER property 5462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Iterator<Trie2.Range> trieIterator=canonIterData.iterator(segmentStarterMapper); 5472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Trie2.Range range; 5482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 5492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* add the start code point to the USet */ 5502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(range.startCodePoint); 5512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final Trie2.ValueMapper segmentStarterMapper=new Trie2.ValueMapper() { 554f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 5552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int map(int in) { 5562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return in&CANON_NOT_SEGMENT_STARTER; 5572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller }; 5592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // low-level properties ------------------------------------------------ *** 5612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Note: Normalizer2Impl.java r30983 (2011-nov-27) 5632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // still had getFCDTrie() which built and cached an FCD trie. 5642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // That provided faster access to FCD data than getFCD16FromNormData() 5652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // but required synchronization and consumed some 10kB of heap memory 5662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // in any process that uses FCD (e.g., via collation). 56705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // minDecompNoCP etc. and smallFCD[] are intended to help with any loss of performance, 56805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // at least for ASCII & CJK. 5692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Builds the canonical-iterator data for this instance. 5722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This is required before any of {@link #isCanonSegmentStarter(int)} or 5732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * {@link #getCanonStartSet(int, UnicodeSet)} are called, 5742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * or else they crash. 5752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return this 5762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public synchronized Normalizer2Impl ensureCanonIterData() { 5782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(canonIterData==null) { 5792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Trie2Writable newData=new Trie2Writable(0, 0); 5802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller canonStartSets=new ArrayList<UnicodeSet>(); 5812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Iterator<Trie2.Range> trieIterator=normTrie.iterator(); 5822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Trie2.Range range; 5832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 5842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller final int norm16=range.value; 58505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(isInert(norm16) || (minYesNo<=norm16 && norm16<minNoNo)) { 5862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Inert, or 2-way mapping (including Hangul syllable). 5872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We do not write a canonStartSet for any yesNo character. 5882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Composites from 2-way mappings are added at runtime from the 5892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // starter's compositions list, and the other characters in 5902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 2-way mappings get CANON_NOT_SEGMENT_STARTER set because they are 5912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // "maybe" characters. 5922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 5932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int c=range.startCodePoint; c<=range.endCodePoint; ++c) { 5952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller final int oldValue=newData.get(c); 5962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int newValue=oldValue; 59705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(isMaybeOrNonZeroCC(norm16)) { 5982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // not a segment starter if it occurs in a decomposition or has cc!=0 5992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newValue|=CANON_NOT_SEGMENT_STARTER; 6002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(norm16<MIN_NORMAL_MAYBE_YES) { 6012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newValue|=CANON_HAS_COMPOSITIONS; 6022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(norm16<minYesNo) { 6042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newValue|=CANON_HAS_COMPOSITIONS; 6052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 6062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c has a one-way decomposition 6072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c2=c; 60805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Do not modify the whole-range norm16 value. 6092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int norm16_2=norm16; 61005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (isDecompNoAlgorithmic(norm16_2)) { 61105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Maps to an isCompYesAndZeroCC. 61205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert c2 = mapAlgorithmic(c2, norm16_2); 61305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert norm16_2 = getNorm16(c2); 61405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // No compatibility mappings for the CanonicalIterator. 61505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert assert(!(isHangulLV(norm16_2) || isHangulLVT(norm16_2))); 6162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 61705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (norm16_2 > minYesNo) { 6182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c decomposes, get everything from the variable-length extra data 61905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int mapping=norm16_2>>OFFSET_SHIFT; 62005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int firstUnit=extraData.charAt(mapping); 6212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int length=firstUnit&MAPPING_LENGTH_MASK; 6222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) { 62305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(c==c2 && (extraData.charAt(mapping-1)&0xff)!=0) { 6242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newValue|=CANON_NOT_SEGMENT_STARTER; // original c has cc!=0 6252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Skip empty mappings (no characters in the decomposition). 6282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(length!=0) { 62905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert ++mapping; // skip over the firstUnit 6302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // add c to first code point's start set 63105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int limit=mapping+length; 63205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert c2=extraData.codePointAt(mapping); 6332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller addToStartSet(newData, c, c2); 6342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Set CANON_NOT_SEGMENT_STARTER for each remaining code point of a 6352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // one-way mapping. A 2-way mapping is possible here after 6362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // intermediate algorithmic mapping. 6372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(norm16_2>=minNoNo) { 63805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert while((mapping+=Character.charCount(c2))<limit) { 63905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert c2=extraData.codePointAt(mapping); 6402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c2Value=newData.get(c2); 6412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((c2Value&CANON_NOT_SEGMENT_STARTER)==0) { 6422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newData.set(c2, c2Value|CANON_NOT_SEGMENT_STARTER); 6432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 6482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c decomposed to c2 algorithmically; c has cc==0 6492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller addToStartSet(newData, c, c2); 6502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(newValue!=oldValue) { 6532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newData.set(c, newValue); 6542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller canonIterData=newData.toTrie2_32(); 6582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this; 6602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int getNorm16(int c) { return normTrie.get(c); } 6632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int getCompQuickCheck(int norm16) { 6652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(norm16<minNoNo || MIN_YES_YES_WITH_CC<=norm16) { 6662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 1; // yes 6672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(minMaybeYes<=norm16) { 6682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 2; // maybe 6692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 6702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; // no 6712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean isAlgorithmicNoNo(int norm16) { return limitNoNo<=norm16 && norm16<minMaybeYes; } 6742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean isCompNo(int norm16) { return minNoNo<=norm16 && norm16<minMaybeYes; } 6752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean isDecompYes(int norm16) { return norm16<minYesNo || minMaybeYes<=norm16; } 6762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int getCC(int norm16) { 6782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(norm16>=MIN_NORMAL_MAYBE_YES) { 67905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return getCCFromNormalYesOrMaybe(norm16); 6802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(norm16<minNoNo || limitNoNo<=norm16) { 6822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 6832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return getCCFromNoNo(norm16); 6852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 68605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public static int getCCFromNormalYesOrMaybe(int norm16) { 68705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return (norm16 >> OFFSET_SHIFT) & 0xff; 68805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 6892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static int getCCFromYesOrMaybe(int norm16) { 69005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return norm16>=MIN_NORMAL_MAYBE_YES ? getCCFromNormalYesOrMaybe(norm16) : 0; 69105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 69205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public int getCCFromYesOrMaybeCP(int c) { 69305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (c < minCompNoMaybeCP) { return 0; } 69405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return getCCFromYesOrMaybe(getNorm16(c)); 6952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 6982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the FCD data for code point c. 6992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c A Unicode code point. 7002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0. 7012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 7022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int getFCD16(int c) { 70305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(c<minDecompNoCP) { 7042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 7052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(c<=0xffff) { 7062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!singleLeadMightHaveNonZeroFCD16(c)) { return 0; } 7072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return getFCD16FromNormData(c); 7092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** Returns true if the single-or-lead code unit c might have non-zero FCD data. */ 7112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean singleLeadMightHaveNonZeroFCD16(int lead) { 7122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 0<=lead<=0xffff 7132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byte bits=smallFCD[lead>>8]; 7142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(bits==0) { return false; } 7152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ((bits>>((lead>>5)&7))&1)!=0; 7162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** Gets the FCD value from the regular normalization data. */ 7192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int getFCD16FromNormData(int c) { 72005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int norm16=getNorm16(c); 72105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (norm16 >= limitNoNo) { 72205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(norm16>=MIN_NORMAL_MAYBE_YES) { 7232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // combining mark 72405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert norm16=getCCFromNormalYesOrMaybe(norm16); 7252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return norm16|(norm16<<8); 7262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(norm16>=minMaybeYes) { 7272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 72805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else { // isDecompNoAlgorithmic(norm16) 72905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int deltaTrailCC = norm16 & DELTA_TCCC_MASK; 73005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (deltaTrailCC <= DELTA_TCCC_1) { 73105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return deltaTrailCC >> OFFSET_SHIFT; 7322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 73305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Maps to an isCompYesAndZeroCC. 73405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert c=mapAlgorithmic(c, norm16); 73505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert norm16=getNorm16(c); 7362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 73805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(norm16<=minYesNo || isHangulLVT(norm16)) { 73905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // no decomposition or Hangul syllable, all zeros 74005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return 0; 74105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 74205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // c decomposes, get everything from the variable-length extra data 74305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int mapping=norm16>>OFFSET_SHIFT; 74405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int firstUnit=extraData.charAt(mapping); 74505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int fcd16=firstUnit>>8; // tccc 74605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) { 74705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fcd16|=extraData.charAt(mapping-1)&0xff00; // lccc 74805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 74905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return fcd16; 7502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 7532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Gets the decomposition for one code point. 7542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c code point 7552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return c's decomposition, if it has one; returns null if it does not have a decomposition 7562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 7572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public String getDecomposition(int c) { 7582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int norm16; 75905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(c<minDecompNoCP || isMaybeOrNonZeroCC(norm16=getNorm16(c))) { 76005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // c does not decompose 76105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return null; 76205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 76305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int decomp = -1; 76405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(isDecompNoAlgorithmic(norm16)) { 76505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Maps to an isCompYesAndZeroCC. 76605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert decomp=c=mapAlgorithmic(c, norm16); 76705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // The mapping might decompose further. 76805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert norm16 = getNorm16(c); 76905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 77005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (norm16 < minYesNo) { 7712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(decomp<0) { 7722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return null; 7732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 7742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return UTF16.valueOf(decomp); 7752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 77605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else if(isHangulLV(norm16) || isHangulLVT(norm16)) { 77705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Hangul syllable: decompose algorithmically 77805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert StringBuilder buffer=new StringBuilder(); 77905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert Hangul.decompose(c, buffer); 78005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return buffer.toString(); 7812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 78205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // c decomposes, get everything from the variable-length extra data 78305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int mapping=norm16>>OFFSET_SHIFT; 78405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int length=extraData.charAt(mapping++)&MAPPING_LENGTH_MASK; 78505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return extraData.substring(mapping, mapping+length); 7862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 7892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Gets the raw decomposition for one code point. 7902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c code point 7912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return c's raw decomposition, if it has one; returns null if it does not have a decomposition 7922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 7932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public String getRawDecomposition(int c) { 7942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int norm16; 7952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) { 7962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c does not decompose 7972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return null; 79805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else if(isHangulLV(norm16) || isHangulLVT(norm16)) { 7992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Hangul syllable: decompose algorithmically 8002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder buffer=new StringBuilder(); 8012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Hangul.getRawDecomposition(c, buffer); 8022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return buffer.toString(); 8032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(isDecompNoAlgorithmic(norm16)) { 8042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return UTF16.valueOf(mapAlgorithmic(c, norm16)); 80505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 80605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // c decomposes, get everything from the variable-length extra data 80705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int mapping=norm16>>OFFSET_SHIFT; 80805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int firstUnit=extraData.charAt(mapping); 80905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int mLength=firstUnit&MAPPING_LENGTH_MASK; // length of normal mapping 81005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if((firstUnit&MAPPING_HAS_RAW_MAPPING)!=0) { 81105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Read the raw mapping from before the firstUnit and before the optional ccc/lccc word. 81205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Bit 7=MAPPING_HAS_CCC_LCCC_WORD 81305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int rawMapping=mapping-((firstUnit>>7)&1)-1; 81405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert char rm0=extraData.charAt(rawMapping); 81505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(rm0<=MAPPING_LENGTH_MASK) { 81605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return extraData.substring(rawMapping-rm0, rawMapping); 8172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 81805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Copy the normal mapping and replace its first two code units with rm0. 81905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert StringBuilder buffer=new StringBuilder(mLength-1).append(rm0); 82005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert mapping+=1+2; // skip over the firstUnit and the first two mapping code units 82105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return buffer.append(extraData, mapping, mapping+mLength-2).toString(); 8222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 82305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else { 82405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert mapping+=1; // skip over the firstUnit 82505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return extraData.substring(mapping, mapping+mLength); 8262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 8302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns true if code point c starts a canonical-iterator string segment. 8312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <b>{@link #ensureCanonIterData()} must have been called before this method, 8322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * or else this method will crash.</b> 8332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c A Unicode code point. 8342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if c starts a canonical-iterator string segment. 8352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 8362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean isCanonSegmentStarter(int c) { 8372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return canonIterData.get(c)>=0; 8382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 8402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns true if there are characters whose decomposition starts with c. 8412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If so, then the set is cleared and then filled with those characters. 8422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <b>{@link #ensureCanonIterData()} must have been called before this method, 8432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * or else this method will crash.</b> 8442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c A Unicode code point. 8452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param set A UnicodeSet to receive the characters whose decompositions 8462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * start with c, if there are any. 8472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if there are characters whose decomposition starts with c. 8482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 8492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean getCanonStartSet(int c, UnicodeSet set) { 8502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int canonValue=canonIterData.get(c)&~CANON_NOT_SEGMENT_STARTER; 8512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(canonValue==0) { 8522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 8532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.clear(); 8552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int value=canonValue&CANON_VALUE_MASK; 8562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((canonValue&CANON_HAS_SET)!=0) { 8572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.addAll(canonStartSets.get(value)); 8582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(value!=0) { 8592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(value); 8602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((canonValue&CANON_HAS_COMPOSITIONS)!=0) { 8622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int norm16=getNorm16(c); 8632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(norm16==JAMO_L) { 8642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int syllable=Hangul.HANGUL_BASE+(c-Hangul.JAMO_L_BASE)*Hangul.JAMO_VT_COUNT; 8652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(syllable, syllable+Hangul.JAMO_VT_COUNT-1); 8662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 8672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller addComposites(getCompositionsList(norm16), set); 8682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 8712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 87305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Fixed norm16 values. 87405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public static final int MIN_YES_YES_WITH_CC=0xfe02; 87505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public static final int JAMO_VT=0xfe00; 87605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public static final int MIN_NORMAL_MAYBE_YES=0xfc00; 87705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public static final int JAMO_L=2; // offset=1 hasCompBoundaryAfter=FALSE 87805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public static final int INERT=1; // offset=0 hasCompBoundaryAfter=TRUE 87905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 88005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // norm16 bit 0 is comp-boundary-after. 88105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public static final int HAS_COMP_BOUNDARY_AFTER=1; 88205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public static final int OFFSET_SHIFT=1; 88305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 88405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // For algorithmic one-way mappings, norm16 bits 2..1 indicate the 88505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // tccc (0, 1, >1) for quick FCC boundary-after tests. 88605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public static final int DELTA_TCCC_0=0; 88705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public static final int DELTA_TCCC_1=2; 88805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public static final int DELTA_TCCC_GT_1=4; 88905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public static final int DELTA_TCCC_MASK=6; 89005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public static final int DELTA_SHIFT=3; 8912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int MAX_DELTA=0x40; 8932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Byte offsets from the start of the data, after the generic header. 8952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_NORM_TRIE_OFFSET=0; 8962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_EXTRA_DATA_OFFSET=1; 8972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_SMALL_FCD_OFFSET=2; 8982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_RESERVED3_OFFSET=3; 8992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_TOTAL_SIZE=7; 9002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Code point thresholds for quick check codes. 9022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_MIN_DECOMP_NO_CP=8; 9032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_MIN_COMP_NO_MAYBE_CP=9; 9042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Norm16 value thresholds for quick check combinations and types of extra data. 90605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 90705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert /** Mappings & compositions in [minYesNo..minYesNoMappingsOnly[. */ 9082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_MIN_YES_NO=10; 90905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert /** Mappings are comp-normalized. */ 9102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_MIN_NO_NO=11; 9112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_LIMIT_NO_NO=12; 9122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_MIN_MAYBE_YES=13; 9132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 91405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert /** Mappings only in [minYesNoMappingsOnly..minNoNo[. */ 9152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int IX_MIN_YES_NO_MAPPINGS_ONLY=14; 91605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert /** Mappings are not comp-normalized but have a comp boundary before. */ 91705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public static final int IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE=15; 91805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert /** Mappings do not have a comp boundary before. */ 91905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public static final int IX_MIN_NO_NO_COMP_NO_MAYBE_CC=16; 92005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert /** Mappings to the empty string. */ 92105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public static final int IX_MIN_NO_NO_EMPTY=17; 9222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 92305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public static final int IX_MIN_LCCC_CP=18; 92405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public static final int IX_COUNT=20; 9252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int MAPPING_HAS_CCC_LCCC_WORD=0x80; 9272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int MAPPING_HAS_RAW_MAPPING=0x40; 92805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // unused bit 0x20; 9292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int MAPPING_LENGTH_MASK=0x1f; 9302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int COMP_1_LAST_TUPLE=0x8000; 9322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int COMP_1_TRIPLE=1; 9332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int COMP_1_TRAIL_LIMIT=0x3400; 9342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int COMP_1_TRAIL_MASK=0x7ffe; 9352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int COMP_1_TRAIL_SHIFT=9; // 10-1 for the "triple" bit 9362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int COMP_2_TRAIL_SHIFT=6; 9372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int COMP_2_TRAIL_MASK=0xffc0; 9382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // higher-level functionality ------------------------------------------ *** 9402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // NFD without an NFD Normalizer2 instance. 9422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public Appendable decompose(CharSequence s, StringBuilder dest) { 9432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller decompose(s, 0, s.length(), dest, s.length()); 9442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return dest; 9452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 9472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Decomposes s[src, limit[ and writes the result to dest. 9482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * limit can be NULL if src is NUL-terminated. 9492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * destLengthEstimate is the initial dest buffer capacity and can be -1. 9502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 9512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void decompose(CharSequence s, int src, int limit, StringBuilder dest, 9522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int destLengthEstimate) { 9532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(destLengthEstimate<0) { 9542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller destLengthEstimate=limit-src; 9552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dest.setLength(0); 9572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ReorderingBuffer buffer=new ReorderingBuffer(this, dest, destLengthEstimate); 9582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller decompose(s, src, limit, buffer); 9592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Dual functionality: 9622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // buffer!=NULL: normalize 9632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes 9642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int decompose(CharSequence s, int src, int limit, 9652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ReorderingBuffer buffer) { 9662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int minNoCP=minDecompNoCP; 9672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prevSrc; 9692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c=0; 9702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int norm16=0; 9712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // only for quick check 9732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prevBoundary=src; 9742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prevCC=0; 9752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 9772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // count code units below the minimum or with irrelevant data for the quick check 9782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(prevSrc=src; src!=limit;) { 9792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( (c=s.charAt(src))<minNoCP || 9802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller isMostDecompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c)) 9812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ) { 9822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++src; 9832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(!UTF16.isSurrogate((char)c)) { 9842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 9852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 9862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c2; 9872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(UTF16Plus.isSurrogateLead(c)) { 9882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) { 9892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=Character.toCodePoint((char)c, c2); 9902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else /* trail surrogate */ { 9922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) { 9932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --src; 9942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=Character.toCodePoint(c2, (char)c); 9952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isMostDecompYesAndZeroCC(norm16=getNorm16(c))) { 9982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller src+=Character.charCount(c); 9992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 10002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 10012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // copy these code units all at once 10052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(src!=prevSrc) { 10062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(buffer!=null) { 10072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.flushAndAppendZeroCC(s, prevSrc, src); 10082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 10092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevCC=0; 10102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevBoundary=src; 10112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(src==limit) { 10142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 10152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Check one above-minimum, relevant code point. 10182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller src+=Character.charCount(c); 10192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(buffer!=null) { 10202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller decompose(c, norm16, buffer); 10212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 10222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isDecompYes(norm16)) { 10232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int cc=getCCFromYesOrMaybe(norm16); 10242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(prevCC<=cc || cc==0) { 10252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevCC=cc; 10262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(cc<=1) { 10272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevBoundary=src; 10282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 10302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return prevBoundary; // "no" or cc out of order 10332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return src; 10362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void decomposeAndAppend(CharSequence s, boolean doDecompose, ReorderingBuffer buffer) { 10382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int limit=s.length(); 10392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(limit==0) { 10402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 10412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(doDecompose) { 10432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller decompose(s, 0, limit, buffer); 10442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 10452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Just merge the strings at the boundary. 10472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c=Character.codePointAt(s, 0); 10482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int src=0; 10492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int firstCC, prevCC, cc; 10502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller firstCC=prevCC=cc=getCC(getNorm16(c)); 10512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(cc!=0) { 10522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevCC=cc; 10532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller src+=Character.charCount(c); 10542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(src>=limit) { 10552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 10562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=Character.codePointAt(s, src); 10582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cc=getCC(getNorm16(c)); 10592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller }; 10602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(s, 0, src, firstCC, prevCC); 10612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(s, src, limit); 10622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 106305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 10642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Very similar to composeQuickCheck(): Make the same changes in both places if relevant. 10652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // doCompose: normalize 10662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // !doCompose: isNormalized (buffer must be empty and initialized) 10672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean compose(CharSequence s, int src, int limit, 10682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean onlyContiguous, 10692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean doCompose, 10702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ReorderingBuffer buffer) { 10712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prevBoundary=src; 107205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int minNoMaybeCP=minCompNoMaybeCP; 10732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 107405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert for (;;) { 107505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point, 107605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // or with (compYes && ccc==0) properties. 107705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int prevSrc; 107805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int c = 0; 107905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int norm16 = 0; 108005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert for (;;) { 108105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (src == limit) { 108205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (prevBoundary != limit && doCompose) { 108305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert buffer.append(s, prevBoundary, limit); 108405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 108505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return true; 108605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 10872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( (c=s.charAt(src))<minNoMaybeCP || 10882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller isCompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c)) 10892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ) { 10902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++src; 10912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 109205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert prevSrc = src++; 109305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(!UTF16.isSurrogate((char)c)) { 109405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert break; 109505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else { 109605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert char c2; 109705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(UTF16Plus.isSurrogateLead(c)) { 109805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(src!=limit && Character.isLowSurrogate(c2=s.charAt(src))) { 109905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert ++src; 110005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert c=Character.toCodePoint((char)c, c2); 110105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 110205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else /* trail surrogate */ { 110305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(prevBoundary<prevSrc && Character.isHighSurrogate(c2=s.charAt(prevSrc-1))) { 110405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert --prevSrc; 110505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert c=Character.toCodePoint(c2, (char)c); 110605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 11072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 110805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(!isCompYesAndZeroCC(norm16=getNorm16(c))) { 110905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert break; 11102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 111405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo. 111505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // The current character is either a "noNo" (has a mapping) 111605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // or a "maybeYes" (combines backward) 111705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // or a "yesYes" with ccc!=0. 111805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // It is not a Hangul syllable or Jamo L because those have "yes" properties. 111905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 112005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Medium-fast path: Handle cases that do not require full decomposition and recomposition. 112105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (!isMaybeOrNonZeroCC(norm16)) { // minNoNo <= norm16 < minMaybeYes 112205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (!doCompose) { 112305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return false; 11242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 112505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Fast path for mapping a character that is immediately surrounded by boundaries. 112605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // In this case, we need not decompose around the current character. 112705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (isDecompNoAlgorithmic(norm16)) { 112805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Maps to a single isCompYesAndZeroCC character 112905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // which also implies hasCompBoundaryBefore. 113005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) || 113105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert hasCompBoundaryBefore(s, src, limit)) { 113205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (prevBoundary != prevSrc) { 113305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert buffer.append(s, prevBoundary, prevSrc); 113405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 113505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert buffer.append(mapAlgorithmic(c, norm16), 0); 113605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert prevBoundary = src; 113705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert continue; 113805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 113905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else if (norm16 < minNoNoCompBoundaryBefore) { 114005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // The mapping is comp-normalized which also implies hasCompBoundaryBefore. 114105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) || 114205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert hasCompBoundaryBefore(s, src, limit)) { 114305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (prevBoundary != prevSrc) { 114405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert buffer.append(s, prevBoundary, prevSrc); 114505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 114605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int mapping = norm16 >> OFFSET_SHIFT; 114705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int length = extraData.charAt(mapping++) & MAPPING_LENGTH_MASK; 114805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert buffer.append(extraData, mapping, mapping + length); 114905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert prevBoundary = src; 115005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert continue; 115105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 115205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else if (norm16 >= minNoNoEmpty) { 115305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // The current character maps to nothing. 115405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Simply omit it from the output if there is a boundary before _or_ after it. 115505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // The character itself implies no boundaries. 115605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (hasCompBoundaryBefore(s, src, limit) || 115705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert hasCompBoundaryAfter(s, prevBoundary, prevSrc, onlyContiguous)) { 115805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (prevBoundary != prevSrc) { 115905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert buffer.append(s, prevBoundary, prevSrc); 116005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 116105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert prevBoundary = src; 116205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert continue; 116305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 11642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 116505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Other "noNo" type, or need to examine more text around this character: 116605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Fall through to the slow path. 116705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else if (isJamoVT(norm16) && prevBoundary != prevSrc) { 11682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char prev=s.charAt(prevSrc-1); 11692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c<Hangul.JAMO_T_BASE) { 117005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // The current character is a Jamo Vowel, 117105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // compose with previous Jamo L and following Jamo T. 117205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert char l = (char)(prev-Hangul.JAMO_L_BASE); 117305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(l<Hangul.JAMO_L_COUNT) { 117405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (!doCompose) { 11752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 11762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 117705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int t; 117805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (src != limit && 117905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 0 < (t = (s.charAt(src) - Hangul.JAMO_T_BASE)) && 118005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert t < Hangul.JAMO_T_COUNT) { 118105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // The next character is a Jamo T. 11822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++src; 118305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else if (hasCompBoundaryBefore(s, src, limit)) { 118405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // No Jamo T follows, not even via decomposition. 118505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert t = 0; 118605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else { 118705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert t = -1; 118805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 118905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (t >= 0) { 119005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int syllable = Hangul.HANGUL_BASE + 119105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert (l*Hangul.JAMO_V_COUNT + (c-Hangul.JAMO_V_BASE)) * 119205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert Hangul.JAMO_T_COUNT + t; 119305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert --prevSrc; // Replace the Jamo L as well. 119405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (prevBoundary != prevSrc) { 119505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert buffer.append(s, prevBoundary, prevSrc); 119605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 119705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert buffer.append((char)syllable); 119805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert prevBoundary = src; 11992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 12002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If we see L+V+x where x!=T then we drop to the slow path, 12022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // decompose and recompose. 12032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // This is to deal with NFKC finding normal L and V but a 120405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // compatibility variant of a T. 120505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // We need to either fully compose that combination here 120605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // (which would complicate the code and may not work with strange custom data) 120705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // or use the slow path. 12082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 120905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else if (Hangul.isHangulLV(prev)) { 121005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // The current character is a Jamo Trailing consonant, 12112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // compose with previous Hangul LV that does not contain a Jamo T. 121205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (!doCompose) { 12132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 12142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 121505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int syllable = prev + c - Hangul.JAMO_T_BASE; 121605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert --prevSrc; // Replace the Hangul LV as well. 121705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (prevBoundary != prevSrc) { 121805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert buffer.append(s, prevBoundary, prevSrc); 12192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 122005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert buffer.append((char)syllable); 122105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert prevBoundary = src; 12222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 12232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 122405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // No matching context, or may need to decompose surrounding text first: 122505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Fall through to the slow path. 122605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else if (norm16 > JAMO_VT) { // norm16 >= MIN_YES_YES_WITH_CC 122705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // One or more combining marks that do not combine-back: 122805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Check for canonical order, copy unchanged if ok and 122905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // if followed by a character with a boundary-before. 123005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int cc = getCCFromNormalYesOrMaybe(norm16); // cc!=0 123105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (onlyContiguous /* FCC */ && getPreviousTrailCC(s, prevBoundary, prevSrc) > cc) { 12322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Fails FCD test, need to decompose and contiguously recompose. 123305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (!doCompose) { 12342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 12352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 123705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // If !onlyContiguous (not FCC), then we ignore the tccc of 123805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // the previous character which passed the quick check "yes && ccc==0" test. 123905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int n16; 124005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert for (;;) { 124105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (src == limit) { 124205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (doCompose) { 124305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert buffer.append(s, prevBoundary, limit); 124405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 124505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return true; 124605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 124705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int prevCC = cc; 124805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert c = Character.codePointAt(s, src); 124905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert n16 = normTrie.get(c); 125005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (n16 >= MIN_YES_YES_WITH_CC) { 125105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert cc = getCCFromNormalYesOrMaybe(n16); 125205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (prevCC > cc) { 125305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (!doCompose) { 125405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return false; 125505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 125605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert break; 125705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 125805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else { 125905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert break; 126005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 126105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert src += Character.charCount(c); 126205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 126305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // p is after the last in-order combining mark. 126405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // If there is a boundary here, then we continue with no change. 126505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (norm16HasCompBoundaryBefore(n16)) { 126605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (isCompYesAndZeroCC(n16)) { 126705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert src += Character.charCount(c); 126805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 126905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert continue; 127005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 127105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Use the slow path. There is no boundary in [prevSrc, src[. 12722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 127505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Slow path: Find the nearest boundaries around the current character, 127605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // decompose and recompose. 127705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) { 127805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert c = Character.codePointBefore(s, prevSrc); 127905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert norm16 = normTrie.get(c); 128005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { 128105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert prevSrc -= Character.charCount(c); 128205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 128305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 128405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (doCompose && prevBoundary != prevSrc) { 128505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert buffer.append(s, prevBoundary, prevSrc); 12862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int recomposeStartIndex=buffer.length(); 128805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // We know there is not a boundary here. 128905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert decomposeShort(s, prevSrc, src, false /* !stopAtCompBoundary */, onlyContiguous, 129005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert buffer); 129105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Decompose until the next boundary. 129205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert src = decomposeShort(s, src, limit, true /* stopAtCompBoundary */, onlyContiguous, 129305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert buffer); 12942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller recompose(buffer, recomposeStartIndex, onlyContiguous); 12952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!doCompose) { 129605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(!buffer.equals(s, prevSrc, src)) { 12972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 12982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.remove(); 13002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevBoundary=src; 13022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 130405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 13052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 13062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Very similar to compose(): Make the same changes in both places if relevant. 13072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * doSpan: spanQuickCheckYes (ignore bit 0 of the return value) 13082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * !doSpan: quickCheck 13092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return bits 31..1: spanQuickCheckYes (==s.length() if "yes") and 13102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * bit 0: set if "maybe"; otherwise, if the span length<s.length() 13112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * then the quick check result is "no" 13122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 13132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int composeQuickCheck(CharSequence s, int src, int limit, 13142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean onlyContiguous, boolean doSpan) { 13152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int qcResult=0; 13162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prevBoundary=src; 131705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int minNoMaybeCP=minCompNoMaybeCP; 13182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 132005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point, 132105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // or with (compYes && ccc==0) properties. 132205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int prevSrc; 132305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int c = 0; 132405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int norm16 = 0; 132505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert for (;;) { 13262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(src==limit) { 13272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (src<<1)|qcResult; // "yes" or "maybe" 13282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( (c=s.charAt(src))<minNoMaybeCP || 13302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller isCompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c)) 13312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ) { 13322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++src; 13332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 133405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert prevSrc = src++; 133505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(!UTF16.isSurrogate((char)c)) { 133605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert break; 133705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else { 133805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert char c2; 133905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(UTF16Plus.isSurrogateLead(c)) { 134005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(src!=limit && Character.isLowSurrogate(c2=s.charAt(src))) { 134105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert ++src; 134205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert c=Character.toCodePoint((char)c, c2); 134305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 134405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else /* trail surrogate */ { 134505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(prevBoundary<prevSrc && Character.isHighSurrogate(c2=s.charAt(prevSrc-1))) { 134605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert --prevSrc; 134705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert c=Character.toCodePoint(c2, (char)c); 134805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 13492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 135005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(!isCompYesAndZeroCC(norm16=getNorm16(c))) { 135105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert break; 13522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 135605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo. 135705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // The current character is either a "noNo" (has a mapping) 135805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // or a "maybeYes" (combines backward) 135905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // or a "yesYes" with ccc!=0. 136005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // It is not a Hangul syllable or Jamo L because those have "yes" properties. 136105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 136205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int prevNorm16 = INERT; 136305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (prevBoundary != prevSrc) { 136405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert prevBoundary = prevSrc; 136505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (!norm16HasCompBoundaryBefore(norm16)) { 136605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert c = Character.codePointBefore(s, prevSrc); 136705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int n16 = getNorm16(c); 136805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (!norm16HasCompBoundaryAfter(n16, onlyContiguous)) { 136905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert prevBoundary -= Character.charCount(c); 137005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert prevNorm16 = n16; 137105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 13722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isMaybeOrNonZeroCC(norm16)) { 13762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int cc=getCCFromYesOrMaybe(norm16); 137705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (onlyContiguous /* FCC */ && cc != 0 && 137805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert getTrailCCFromCompYesAndZeroCC(prevNorm16) > cc) { 137905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // The [prevBoundary..prevSrc[ character 138005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // passed the quick check "yes && ccc==0" test 138105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // but is out of canonical order with the current combining mark. 138205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else { 138305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // If !onlyContiguous (not FCC), then we ignore the tccc of 138405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // the previous character which passed the quick check "yes && ccc==0" test. 138505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert for (;;) { 138605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (norm16 < MIN_YES_YES_WITH_CC) { 138705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (!doSpan) { 138805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert qcResult = 1; 138905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else { 139005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return prevBoundary << 1; // spanYes does not care to know it's "maybe" 139105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 139205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 139305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (src == limit) { 139405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return (src<<1) | qcResult; // "yes" or "maybe" 139505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 139605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int prevCC = cc; 139705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert c = Character.codePointAt(s, src); 139805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert norm16 = getNorm16(c); 139905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (isMaybeOrNonZeroCC(norm16)) { 140005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert cc = getCCFromYesOrMaybe(norm16); 140105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (!(prevCC <= cc || cc == 0)) { 140205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert break; 140305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 14042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 140505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert break; 14062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 140705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert src += Character.charCount(c); 140805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 140905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // src is after the last in-order combining mark. 141005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (isCompYesAndZeroCC(norm16)) { 141105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert prevBoundary = src; 141205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert src += Character.charCount(c); 141305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert continue; 14142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return prevBoundary<<1; // "no" 14182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void composeAndAppend(CharSequence s, 14212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean doCompose, 14222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean onlyContiguous, 14232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ReorderingBuffer buffer) { 14242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int src=0, limit=s.length(); 14252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!buffer.isEmpty()) { 142605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int firstStarterInSrc=findNextCompBoundary(s, 0, limit, onlyContiguous); 14272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(0!=firstStarterInSrc) { 14282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int lastStarterInDest=findPreviousCompBoundary(buffer.getStringBuilder(), 142905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert buffer.length(), onlyContiguous); 14302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder middle=new StringBuilder((buffer.length()-lastStarterInDest)+ 14312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller firstStarterInSrc+16); 14322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller middle.append(buffer.getStringBuilder(), lastStarterInDest, buffer.length()); 14332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.removeSuffix(buffer.length()-lastStarterInDest); 14342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller middle.append(s, 0, firstStarterInSrc); 14352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller compose(middle, 0, middle.length(), onlyContiguous, true, buffer); 14362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller src=firstStarterInSrc; 14372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(doCompose) { 14402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller compose(s, src, limit, onlyContiguous, true, buffer); 14412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 14422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(s, src, limit); 14432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Dual functionality: 14462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // buffer!=NULL: normalize 14472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes 14482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int makeFCD(CharSequence s, int src, int limit, ReorderingBuffer buffer) { 14492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Note: In this function we use buffer->appendZeroCC() because we track 14502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the lead and trail combining classes here, rather than leaving it to 14512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the ReorderingBuffer. 14522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The exception is the call to decomposeShort() which uses the buffer 14532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // in the normal way. 14542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1. 14562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Similar to the prevBoundary in the compose() implementation. 14572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prevBoundary=src; 14582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prevSrc; 14592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c=0; 14602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prevFCD16=0; 14612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int fcd16=0; 14622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 14642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // count code units with lccc==0 14652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(prevSrc=src; src!=limit;) { 146605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if((c=s.charAt(src))<minLcccCP) { 14672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevFCD16=~c; 14682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++src; 14692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(!singleLeadMightHaveNonZeroFCD16(c)) { 14702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevFCD16=0; 14712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++src; 14722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 14732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(UTF16.isSurrogate((char)c)) { 14742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c2; 14752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(UTF16Plus.isSurrogateLead(c)) { 14762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) { 14772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=Character.toCodePoint((char)c, c2); 14782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else /* trail surrogate */ { 14802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) { 14812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --src; 14822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=Character.toCodePoint(c2, (char)c); 14832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((fcd16=getFCD16FromNormData(c))<=0xff) { 14872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevFCD16=fcd16; 14882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller src+=Character.charCount(c); 14892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 14902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 14912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // copy these code units all at once 14952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(src!=prevSrc) { 14962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(src==limit) { 14972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(buffer!=null) { 14982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.flushAndAppendZeroCC(s, prevSrc, src); 14992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 15012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevBoundary=src; 15032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We know that the previous character's lccc==0. 15042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(prevFCD16<0) { 150505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Fetching the fcd16 value was deferred for this below-minLcccCP code point. 15062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int prev=~prevFCD16; 150705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(prev<minDecompNoCP) { 150805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert prevFCD16=0; 150905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else { 151005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert prevFCD16=getFCD16FromNormData(prev); 151105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(prevFCD16>1) { 151205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert --prevBoundary; 151305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 15142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 15162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int p=src-1; 15172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( Character.isLowSurrogate(s.charAt(p)) && prevSrc<p && 15182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Character.isHighSurrogate(s.charAt(p-1)) 15192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ) { 15202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --p; 15212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Need to fetch the previous character's FCD value because 15222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // prevFCD16 was just for the trail surrogate code point. 15232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevFCD16=getFCD16FromNormData(Character.toCodePoint(s.charAt(p), s.charAt(p+1))); 15242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Still known to have lccc==0 because its lead surrogate unit had lccc==0. 15252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(prevFCD16>1) { 15272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevBoundary=p; 15282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(buffer!=null) { 15312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The last lccc==0 character is excluded from the 15322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // flush-and-append call in case it needs to be modified. 15332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.flushAndAppendZeroCC(s, prevSrc, prevBoundary); 15342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(s, prevBoundary, src); 15352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The start of the current character (c). 15372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevSrc=src; 15382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(src==limit) { 15392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 15402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 15422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller src+=Character.charCount(c); 15432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The current character (c) at [prevSrc..src[ has a non-zero lead combining class. 15442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Check for proper order, and decompose locally if necessary. 15452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((prevFCD16&0xff)<=(fcd16>>8)) { 15462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // proper order: prev tccc <= current lccc 15472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((fcd16&0xff)<=1) { 15482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevBoundary=src; 15492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(buffer!=null) { 15512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.appendZeroCC(c); 15522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevFCD16=fcd16; 15542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 15552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(buffer==null) { 15562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return prevBoundary; // quick check "no" 15572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 15582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 15592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Back out the part of the source that we copied or appended 15602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * already but is now going to be decomposed. 15612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * prevSrc is set to after what was copied/appended. 15622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 15632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.removeSuffix(prevSrc-prevBoundary); 15642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 15652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Find the part of the source that needs to be decomposed, 15662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * up to the next safe boundary. 15672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 15682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller src=findNextFCDBoundary(s, src, limit); 15692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 15702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The source text does not fulfill the conditions for FCD. 15712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Decompose and reorder a limited piece of the text. 15722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 157305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert decomposeShort(s, prevBoundary, src, false, false, buffer); 15742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevBoundary=src; 15752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevFCD16=0; 15762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return src; 15792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void makeFCDAndAppend(CharSequence s, boolean doMakeFCD, ReorderingBuffer buffer) { 15812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int src=0, limit=s.length(); 15822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(!buffer.isEmpty()) { 15832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int firstBoundaryInSrc=findNextFCDBoundary(s, 0, limit); 15842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(0!=firstBoundaryInSrc) { 15852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStringBuilder(), 15862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.length()); 15872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder middle=new StringBuilder((buffer.length()-lastBoundaryInDest)+ 15882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller firstBoundaryInSrc+16); 15892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller middle.append(buffer.getStringBuilder(), lastBoundaryInDest, buffer.length()); 15902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.removeSuffix(buffer.length()-lastBoundaryInDest); 15912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller middle.append(s, 0, firstBoundaryInSrc); 15922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller makeFCD(middle, 0, middle.length(), buffer); 15932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller src=firstBoundaryInSrc; 15942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(doMakeFCD) { 15972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller makeFCD(s, src, limit, buffer); 15982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 15992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(s, src, limit); 16002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 160305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public boolean hasDecompBoundaryBefore(int c) { 160405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return c < minLcccCP || (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) || 160505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert norm16HasDecompBoundaryBefore(getNorm16(c)); 160605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 160705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public boolean norm16HasDecompBoundaryBefore(int norm16) { 160805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (norm16 < minNoNoCompNoMaybeCC) { 160905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return true; 161005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 161105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (norm16 >= limitNoNo) { 161205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT; 161305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 161405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // c decomposes, get everything from the variable-length extra data 161505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int mapping=norm16>>OFFSET_SHIFT; 161605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int firstUnit=extraData.charAt(mapping); 161705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // true if leadCC==0 (hasFCDBoundaryBefore()) 161805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (extraData.charAt(mapping-1)&0xff00)==0; 161905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 162005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public boolean hasDecompBoundaryAfter(int c) { 162105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (c < minDecompNoCP) { 162205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return true; 162305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 162405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) { 162505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return true; 162605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 162705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return norm16HasDecompBoundaryAfter(getNorm16(c)); 162805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 162905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public boolean norm16HasDecompBoundaryAfter(int norm16) { 163005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(norm16 <= minYesNo || isHangulLVT(norm16)) { 163105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return true; 163205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 163305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (norm16 >= limitNoNo) { 163405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (isMaybeOrNonZeroCC(norm16)) { 163505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT; 16362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 163705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Maps to an isCompYesAndZeroCC. 163805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1; 163905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 164005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // c decomposes, get everything from the variable-length extra data 164105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int mapping=norm16>>OFFSET_SHIFT; 164205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int firstUnit=extraData.charAt(mapping); 164305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // decomp after-boundary: same as hasFCDBoundaryAfter(), 164405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // fcd16<=1 || trailCC==0 164505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(firstUnit>0x1ff) { 164605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return false; // trailCC>1 164705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 164805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(firstUnit<=0xff) { 164905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return true; // trailCC==0 16502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 165105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // if(trailCC==1) test leadCC==0, same as checking for before-boundary 165205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // true if leadCC==0 (hasFCDBoundaryBefore()) 165305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (extraData.charAt(mapping-1)&0xff00)==0; 16542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean isDecompInert(int c) { return isDecompYesAndZeroCC(getNorm16(c)); } 16562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 16572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean hasCompBoundaryBefore(int c) { 165805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(getNorm16(c)); 16592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 166005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public boolean hasCompBoundaryAfter(int c, boolean onlyContiguous) { 166105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return norm16HasCompBoundaryAfter(getNorm16(c), onlyContiguous); 16622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 166305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public boolean isCompInert(int c, boolean onlyContiguous) { 166405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int norm16=getNorm16(c); 166505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return isCompYesAndZeroCC(norm16) && 166605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert (norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 && 166705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert (!onlyContiguous || isInert(norm16) || extraData.charAt(norm16>>OFFSET_SHIFT) <= 0x1ff); 16682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 166905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 167005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public boolean hasFCDBoundaryBefore(int c) { return hasDecompBoundaryBefore(c); } 167105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert public boolean hasFCDBoundaryAfter(int c) { return hasDecompBoundaryAfter(c); } 16722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean isFCDInert(int c) { return getFCD16(c)<=1; } 16732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 16742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean isMaybe(int norm16) { return minMaybeYes<=norm16 && norm16<=JAMO_VT; } 16752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean isMaybeOrNonZeroCC(int norm16) { return norm16>=minMaybeYes; } 167605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private static boolean isInert(int norm16) { return norm16==INERT; } 167705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private static boolean isJamoL(int norm16) { return norm16==JAMO_L; } 16782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static boolean isJamoVT(int norm16) { return norm16==JAMO_VT; } 167905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private int hangulLVT() { return minYesNoMappingsOnly|HAS_COMP_BOUNDARY_AFTER; } 168005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private boolean isHangulLV(int norm16) { return norm16==minYesNo; } 168105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private boolean isHangulLVT(int norm16) { 168205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return norm16==hangulLVT(); 168305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 16842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean isCompYesAndZeroCC(int norm16) { return norm16<minNoNo; } 16852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // UBool isCompYes(uint16_t norm16) const { 16862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo; 16872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // } 16882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // UBool isCompYesOrMaybe(uint16_t norm16) const { 16892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // return norm16<minNoNo || minMaybeYes<=norm16; 16902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // } 16912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // private boolean hasZeroCCFromDecompYes(int norm16) { 16922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // return norm16<=MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT; 16932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // } 16942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean isDecompYesAndZeroCC(int norm16) { 16952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return norm16<minYesNo || 16962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller norm16==JAMO_VT || 16972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (minMaybeYes<=norm16 && norm16<=MIN_NORMAL_MAYBE_YES); 16982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 17002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * A little faster and simpler than isDecompYesAndZeroCC() but does not include 17012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the MaybeYes which combine-forward and have ccc=0. 170205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * (Standard Unicode 10 normalization does not have such characters.) 17032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 17042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean isMostDecompYesAndZeroCC(int norm16) { 17052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return norm16<minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT; 17062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean isDecompNoAlgorithmic(int norm16) { return norm16>=limitNoNo; } 17082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 17092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // For use with isCompYes(). 17102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC. 17112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // static uint8_t getCCFromYes(uint16_t norm16) { 171205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // return norm16>=MIN_YES_YES_WITH_CC ? getCCFromNormalYesOrMaybe(norm16) : 0; 17132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // } 17142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int getCCFromNoNo(int norm16) { 171505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int mapping=norm16>>OFFSET_SHIFT; 171605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if((extraData.charAt(mapping)&MAPPING_HAS_CCC_LCCC_WORD)!=0) { 171705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return extraData.charAt(mapping-1)&0xff; 17182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 17192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 17202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 172205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int getTrailCCFromCompYesAndZeroCC(int norm16) { 172305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(norm16<=minYesNo) { 172405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return 0; // yesYes and Hangul LV have ccc=tccc=0 17252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 172605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // For Hangul LVT we harmlessly fetch a firstUnit with tccc=0 here. 172705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return extraData.charAt(norm16>>OFFSET_SHIFT)>>8; // tccc from yesNo 17282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 17312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Requires algorithmic-NoNo. 17322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int mapAlgorithmic(int c, int norm16) { 173305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return c+(norm16>>DELTA_SHIFT)-centerNoNoDelta; 17342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 17362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Requires minYesNo<norm16<limitNoNo. 173705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // private int getMapping(int norm16) { return extraData+(norm16>>OFFSET_SHIFT); } 17382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 17392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 17402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return index into maybeYesCompositions, or -1 17412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 17422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int getCompositionsListForDecompYes(int norm16) { 174305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(norm16<JAMO_L || MIN_NORMAL_MAYBE_YES<=norm16) { 17442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 17452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 17462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((norm16-=minMaybeYes)<0) { 17472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // norm16<minMaybeYes: index into extraData which is a substring at 17482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // maybeYesCompositions[MIN_NORMAL_MAYBE_YES-minMaybeYes] 17492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // same as (MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16 17502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller norm16+=MIN_NORMAL_MAYBE_YES; // for yesYes; if Jamo L: harmless empty list 17512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 175205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return norm16>>OFFSET_SHIFT; 17532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 17562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return index into maybeYesCompositions 17572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 17582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int getCompositionsListForComposite(int norm16) { 175905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // A composite has both mapping & compositions list. 176005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int list=((MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16)>>OFFSET_SHIFT; 176105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int firstUnit=maybeYesCompositions.charAt(list); 176205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return list+ // mapping in maybeYesCompositions 176305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 1+ // +1 to skip the first unit with the mapping length 17642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (firstUnit&MAPPING_LENGTH_MASK); // + mapping length 17652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 176605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private int getCompositionsListForMaybe(int norm16) { 176705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // minMaybeYes<=norm16<MIN_NORMAL_MAYBE_YES 176805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return (norm16-minMaybeYes)>>OFFSET_SHIFT; 176905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 17702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 17712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c code point must have compositions 17722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return index into maybeYesCompositions 17732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 17742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int getCompositionsList(int norm16) { 17752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return isDecompYes(norm16) ? 17762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller getCompositionsListForDecompYes(norm16) : 17772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller getCompositionsListForComposite(norm16); 17782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 17802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Decompose a short piece of text which is likely to contain characters that 17812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // fail the quick check loop and/or where the quick check loop's overhead 17822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // is unlikely to be amortized. 17832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Called by the compose() and makeFCD() implementations. 17842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Public in Java for collation implementation code. 178505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private int decomposeShort( 178605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert CharSequence s, int src, int limit, 178705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert boolean stopAtCompBoundary, boolean onlyContiguous, 178805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert ReorderingBuffer buffer) { 17892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(src<limit) { 17902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c=Character.codePointAt(s, src); 179105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (stopAtCompBoundary && c < minCompNoMaybeCP) { 179205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return src; 179305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 179405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int norm16 = getNorm16(c); 179505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (stopAtCompBoundary && norm16HasCompBoundaryBefore(norm16)) { 179605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return src; 179705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 17982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller src+=Character.charCount(c); 179905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert decompose(c, norm16, buffer); 180005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (stopAtCompBoundary && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { 180105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return src; 180205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 18032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 180405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return src; 18052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 180605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private void decompose(int c, int norm16, ReorderingBuffer buffer) { 180705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // get the decomposition and the lead and trail cc's 180805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (norm16 >= limitNoNo) { 180905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (isMaybeOrNonZeroCC(norm16)) { 18102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(c, getCCFromYesOrMaybe(norm16)); 181105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return; 181205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 181305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Maps to an isCompYesAndZeroCC. 181405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert c=mapAlgorithmic(c, norm16); 181505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert norm16=getNorm16(c); 181605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 181705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (norm16 < minYesNo) { 181805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // c does not decompose 181905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert buffer.append(c, 0); 182005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else if(isHangulLV(norm16) || isHangulLVT(norm16)) { 182105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Hangul syllable: decompose algorithmically 182205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert Hangul.decompose(c, buffer); 182305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else { 182405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // c decomposes, get everything from the variable-length extra data 182505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int mapping=norm16>>OFFSET_SHIFT; 182605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int firstUnit=extraData.charAt(mapping); 182705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int length=firstUnit&MAPPING_LENGTH_MASK; 182805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int leadCC, trailCC; 182905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert trailCC=firstUnit>>8; 183005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) { 183105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert leadCC=extraData.charAt(mapping-1)>>8; 18322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 183305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert leadCC=0; 18342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 183505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert ++mapping; // skip over the firstUnit 183605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert buffer.append(extraData, mapping, mapping+length, leadCC, trailCC); 18372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 18402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 18412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Finds the recomposition result for 18422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a forward-combining "lead" character, 18432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * specified with a pointer to its compositions list, 18442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and a backward-combining "trail" character. 18452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 18462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>If the lead and trail characters combine, then this function returns 18472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the following "compositeAndFwd" value: 18482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <pre> 18492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Bits 21..1 composite character 18502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Bit 0 set if the composite is a forward-combining starter 18512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * </pre> 18522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * otherwise it returns -1. 18532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 18542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>The compositions list has (trail, compositeAndFwd) pair entries, 18552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * encoded as either pairs or triples of 16-bit units. 18562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The last entry has the high bit of its first unit set. 18572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 18582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>The list is sorted by ascending trail characters (there are no duplicates). 18592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * A linear search is used. 18602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 18612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <p>See normalizer2impl.h for a more detailed description 18622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * of the compositions list format. 18632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 18642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static int combine(String compositions, int list, int trail) { 18652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int key1, firstUnit; 18662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(trail<COMP_1_TRAIL_LIMIT) { 18672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // trail character is 0..33FF 18682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // result entry may have 2 or 3 units 18692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller key1=(trail<<1); 18702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(key1>(firstUnit=compositions.charAt(list))) { 18712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list+=2+(firstUnit&COMP_1_TRIPLE); 18722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(key1==(firstUnit&COMP_1_TRAIL_MASK)) { 18742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((firstUnit&COMP_1_TRIPLE)!=0) { 1875f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert return (compositions.charAt(list+1)<<16)|compositions.charAt(list+2); 18762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 18772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return compositions.charAt(list+1); 18782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 18812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // trail character is 3400..10FFFF 18822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // result entry has 3 units 18832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller key1=COMP_1_TRAIL_LIMIT+(((trail>>COMP_1_TRAIL_SHIFT))&~COMP_1_TRIPLE); 18842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int key2=(trail<<COMP_2_TRAIL_SHIFT)&0xffff; 18852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int secondUnit; 18862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 18872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(key1>(firstUnit=compositions.charAt(list))) { 18882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list+=2+(firstUnit&COMP_1_TRIPLE); 18892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) { 18902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(key2>(secondUnit=compositions.charAt(list+1))) { 18912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((firstUnit&COMP_1_LAST_TUPLE)!=0) { 18922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 18932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 18942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list+=3; 18952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) { 18972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ((secondUnit&~COMP_2_TRAIL_MASK)<<16)|compositions.charAt(list+2); 18982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 18992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 19002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 19022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 19032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 19072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 19092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param list some character's compositions list 19102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param set recursively receives the composites from these compositions 19112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 19122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void addComposites(int list, UnicodeSet set) { 19132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int firstUnit, compositeAndFwd; 19142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller do { 19152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller firstUnit=maybeYesCompositions.charAt(list); 19162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((firstUnit&COMP_1_TRIPLE)==0) { 19172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller compositeAndFwd=maybeYesCompositions.charAt(list+1); 19182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list+=2; 19192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1920f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert compositeAndFwd=((maybeYesCompositions.charAt(list+1)&~COMP_2_TRAIL_MASK)<<16)| 19212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller maybeYesCompositions.charAt(list+2); 19222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list+=3; 19232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int composite=compositeAndFwd>>1; 19252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((compositeAndFwd&1)!=0) { 19262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller addComposites(getCompositionsListForComposite(getNorm16(composite)), set); 19272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(composite); 19292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } while((firstUnit&COMP_1_LAST_TUPLE)==0); 19302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 19322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Recomposes the buffer text starting at recomposeStartIndex 19332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (which is in NFD - decomposed and canonically ordered), 19342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and truncates the buffer contents. 19352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 19362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Note that recomposition never lengthens the text: 19372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Any character consists of either one or two code units; 19382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a composition may contain at most one more code unit than the original starter, 19392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * while the combining mark that is removed has at least one code unit. 19402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 19412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void recompose(ReorderingBuffer buffer, int recomposeStartIndex, 19422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean onlyContiguous) { 19432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder sb=buffer.getStringBuilder(); 19442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int p=recomposeStartIndex; 19452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(p==sb.length()) { 19462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 19472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 19492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int starter, pRemove; 19502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int compositionsList; 19512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c, compositeAndFwd; 19522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int norm16; 19532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int cc, prevCC; 19542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean starterIsSupplementary; 19552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 19562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Some of the following variables are not used until we have a forward-combining starter 19572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and are only initialized now to avoid compiler warnings. 19582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller compositionsList=-1; // used as indicator for whether we have a forward-combining starter 19592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller starter=-1; 19602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller starterIsSupplementary=false; 19612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevCC=0; 19622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 19632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(;;) { 19642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c=sb.codePointAt(p); 19652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p+=Character.charCount(c); 19662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller norm16=getNorm16(c); 19672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cc=getCCFromYesOrMaybe(norm16); 19682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( // this character combines backward and 19692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller isMaybe(norm16) && 19702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // we have seen a starter that combines forward and 19712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller compositionsList>=0 && 19722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the backward-combining character is not blocked 19732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (prevCC<cc || prevCC==0) 19742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ) { 19752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isJamoVT(norm16)) { 19762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c is a Jamo V/T, see if we can compose it with the previous character. 19772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c<Hangul.JAMO_T_BASE) { 19782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T. 19792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char prev=(char)(sb.charAt(starter)-Hangul.JAMO_L_BASE); 19802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(prev<Hangul.JAMO_L_COUNT) { 19812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pRemove=p-1; 19822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char syllable=(char) 19832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (Hangul.HANGUL_BASE+ 19842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (prev*Hangul.JAMO_V_COUNT+(c-Hangul.JAMO_V_BASE))* 19852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Hangul.JAMO_T_COUNT); 19862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char t; 19872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(p!=sb.length() && (t=(char)(sb.charAt(p)-Hangul.JAMO_T_BASE))<Hangul.JAMO_T_COUNT) { 19882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++p; 19892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller syllable+=t; // The next character was a Jamo T. 19902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sb.setCharAt(starter, syllable); 19922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // remove the Jamo V/T 19932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sb.delete(pRemove, p); 19942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p=pRemove; 19952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 19972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* 19982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * No "else" for Jamo T: 19992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Since the input is in NFD, there are no Hangul LV syllables that 20002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a Jamo T could combine with. 20012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * All Jamo Ts are combined above when handling Jamo Vs. 20022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 20032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(p==sb.length()) { 20042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 20052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller compositionsList=-1; 20072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 20082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if((compositeAndFwd=combine(maybeYesCompositions, compositionsList, c))>=0) { 20092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The starter and the combining mark (c) do combine. 20102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int composite=compositeAndFwd>>1; 20112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 20122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Remove the combining mark. 20132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pRemove=p-Character.charCount(c); // pRemove & p: start & limit of the combining mark 20142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sb.delete(pRemove, p); 20152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p=pRemove; 20162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Replace the starter with the composite. 20172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(starterIsSupplementary) { 20182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(composite>0xffff) { 20192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // both are supplementary 20202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sb.setCharAt(starter, UTF16.getLeadSurrogate(composite)); 20212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sb.setCharAt(starter+1, UTF16.getTrailSurrogate(composite)); 20222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 20232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sb.setCharAt(starter, (char)c); 20242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sb.deleteCharAt(starter+1); 20252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The composite is shorter than the starter, 20262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // move the intermediate characters forward one. 20272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller starterIsSupplementary=false; 20282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --p; 20292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(composite>0xffff) { 20312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The composite is longer than the starter, 20322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // move the intermediate characters back one. 20332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller starterIsSupplementary=true; 20342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sb.setCharAt(starter, UTF16.getLeadSurrogate(composite)); 20352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sb.insert(starter+1, UTF16.getTrailSurrogate(composite)); 20362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++p; 20372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 20382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // both are on the BMP 20392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller sb.setCharAt(starter, (char)composite); 20402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 20422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Keep prevCC because we removed the combining mark. 20432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 20442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(p==sb.length()) { 20452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 20462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Is the composite a starter that combines forward? 20482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((compositeAndFwd&1)!=0) { 20492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller compositionsList= 20502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller getCompositionsListForComposite(getNorm16(composite)); 20512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 20522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller compositionsList=-1; 20532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 20552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We combined; continue with looking for compositions. 20562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 20572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 20602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // no combination this time 20612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevCC=cc; 20622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(p==sb.length()) { 20632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 20642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 20662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If c did not combine, then check if it is a starter. 20672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(cc==0) { 20682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Found a new starter. 20692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((compositionsList=getCompositionsListForDecompYes(norm16))>=0) { 20702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // It may combine with something, prepare for it. 20712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c<=0xffff) { 20722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller starterIsSupplementary=false; 20732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller starter=p-1; 20742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 20752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller starterIsSupplementary=true; 20762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller starter=p-2; 20772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(onlyContiguous) { 20802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // FCC: no discontiguous compositions; any intervening character blocks. 20812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller compositionsList=-1; 20822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.flush(); 20852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 20862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 20872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int composePair(int a, int b) { 20882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int norm16=getNorm16(a); // maps an out-of-range 'a' to inert norm16=0 20892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int list; 20902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isInert(norm16)) { 20912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 20922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(norm16<minYesNoMappingsOnly) { 209305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // a combines forward. 20942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(isJamoL(norm16)) { 20952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b-=Hangul.JAMO_V_BASE; 20962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(0<=b && b<Hangul.JAMO_V_COUNT) { 20972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 20982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (Hangul.HANGUL_BASE+ 20992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ((a-Hangul.JAMO_L_BASE)*Hangul.JAMO_V_COUNT+b)* 21002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Hangul.JAMO_T_COUNT); 21012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 21022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 21032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 210405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else if(isHangulLV(norm16)) { 21052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b-=Hangul.JAMO_T_BASE; 210605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(0<b && b<Hangul.JAMO_T_COUNT) { // not b==0! 21072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return a+b; 21082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 21092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 21102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 21122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 'a' has a compositions list in extraData 211305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert list=((MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16)>>OFFSET_SHIFT; 21142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(norm16>minYesNo) { // composite 'a' has both mapping & compositions list 21152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller list+= // mapping pointer 211605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 1+ // +1 to skip the first unit with the mapping length 211705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert (maybeYesCompositions.charAt(list)&MAPPING_LENGTH_MASK); // + mapping length 21182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if(norm16<minMaybeYes || MIN_NORMAL_MAYBE_YES<=norm16) { 21212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 21222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 212305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert list=getCompositionsListForMaybe(norm16); // offset into maybeYesCompositions 21242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(b<0 || 0x10ffff<b) { // combine(list, b) requires a valid code point b 21262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 21272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return combine(maybeYesCompositions, list, b)>>1; 21292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 21312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 21322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Does c have a composition boundary before it? 21332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * True if its decomposition begins with a character that has 21342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * ccc=0 && NFC_QC=Yes (isCompYesAndZeroCC()). 21352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * As a shortcut, this is true if c itself has ccc=0 && NFC_QC=Yes 21362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * (isCompYesAndZeroCC()) so we need not decompose. 21372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 21382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean hasCompBoundaryBefore(int c, int norm16) { 213905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(norm16); 214005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 214105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private boolean norm16HasCompBoundaryBefore(int norm16) { 214205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return norm16 < minNoNoCompNoMaybeCC || isAlgorithmicNoNo(norm16); 214305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 214405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private boolean hasCompBoundaryBefore(CharSequence s, int src, int limit) { 214505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return src == limit || hasCompBoundaryBefore(Character.codePointAt(s, src)); 21462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 214705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private boolean norm16HasCompBoundaryAfter(int norm16, boolean onlyContiguous) { 214805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return (norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 && 214905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert (!onlyContiguous || isTrailCC01ForCompBoundaryAfter(norm16)); 215005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 215105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private boolean hasCompBoundaryAfter(CharSequence s, int start, int p, boolean onlyContiguous) { 215205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return start == p || hasCompBoundaryAfter(Character.codePointBefore(s, p), onlyContiguous); 215305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 215405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert /** For FCC: Given norm16 HAS_COMP_BOUNDARY_AFTER, does it have tccc<=1? */ 215505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private boolean isTrailCC01ForCompBoundaryAfter(int norm16) { 215605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return isInert(norm16) || (isDecompNoAlgorithmic(norm16) ? 215705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1 : extraData.charAt(norm16 >> OFFSET_SHIFT) <= 0x1ff); 215805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 215905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 216005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private int findPreviousCompBoundary(CharSequence s, int p, boolean onlyContiguous) { 21612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(p>0) { 21622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c=Character.codePointBefore(s, p); 216305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int norm16 = getNorm16(c); 216405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { 216505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert break; 216605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 21672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p-=Character.charCount(c); 216805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if(hasCompBoundaryBefore(c, norm16)) { 21692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 21702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return p; 21732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 217405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private int findNextCompBoundary(CharSequence s, int p, int limit, boolean onlyContiguous) { 21752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(p<limit) { 21762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c=Character.codePointAt(s, p); 21772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int norm16=normTrie.get(c); 21782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(hasCompBoundaryBefore(c, norm16)) { 21792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 21802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p+=Character.charCount(c); 218205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { 218305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert break; 218405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 21852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return p; 21872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 21882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 21892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int findPreviousFCDBoundary(CharSequence s, int p) { 21902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(p>0) { 21912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c=Character.codePointBefore(s, p); 219205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int norm16; 219305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (c < minDecompNoCP || norm16HasDecompBoundaryAfter(norm16 = getNorm16(c))) { 219405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert break; 219505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 21962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p-=Character.charCount(c); 219705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (norm16HasDecompBoundaryBefore(norm16)) { 21982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 21992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 22002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 22012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return p; 22022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 22032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int findNextFCDBoundary(CharSequence s, int p, int limit) { 22042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(p<limit) { 22052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c=Character.codePointAt(s, p); 220605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert int norm16; 220705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (c < minLcccCP || norm16HasDecompBoundaryBefore(norm16 = getNorm16(c))) { 22082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 22092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 22102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p+=Character.charCount(c); 221105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (norm16HasDecompBoundaryAfter(norm16)) { 221205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert break; 221305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 22142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 22152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return p; 22162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 22172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 221805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private int getPreviousTrailCC(CharSequence s, int start, int p) { 221905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert if (start == p) { 222005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return 0; 222105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 222205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert return getFCD16(Character.codePointBefore(s, p)); 222305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } 222405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 22252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private void addToStartSet(Trie2Writable newData, int origin, int decompLead) { 22262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int canonValue=newData.get(decompLead); 22272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((canonValue&(CANON_HAS_SET|CANON_VALUE_MASK))==0 && origin!=0) { 22282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // origin is the first character whose decomposition starts with 22292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the character for which we are setting the value. 22302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newData.set(decompLead, canonValue|origin); 22312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 22322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // origin is not the first character, or it is U+0000. 22332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSet set; 22342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((canonValue&CANON_HAS_SET)==0) { 22352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int firstOrigin=canonValue&CANON_VALUE_MASK; 22362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|canonStartSets.size(); 22372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller newData.set(decompLead, canonValue); 22382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller canonStartSets.add(set=new UnicodeSet()); 22392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(firstOrigin!=0) { 22402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(firstOrigin); 22412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 22422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 22432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set=canonStartSets.get(canonValue&CANON_VALUE_MASK); 22442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 22452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller set.add(origin); 22462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 22472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 22482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 22492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller @SuppressWarnings("unused") 22502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private VersionInfo dataVersion; 22512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 225205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // BMP code point thresholds for quick check loops looking at single UTF-16 code units. 22532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int minDecompNoCP; 22542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int minCompNoMaybeCP; 225505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private int minLcccCP; 22562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 22572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Norm16 value thresholds for quick check combinations and types of extra data. 22582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int minYesNo; 22592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int minYesNoMappingsOnly; 22602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int minNoNo; 226105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private int minNoNoCompBoundaryBefore; 226205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private int minNoNoCompNoMaybeCC; 226305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private int minNoNoEmpty; 22642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int limitNoNo; 226505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert private int centerNoNoDelta; 22662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int minMaybeYes; 22672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 22682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private Trie2_16 normTrie; 22692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private String maybeYesCompositions; 22702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private String extraData; // mappings and/or compositions for yesYes, yesNo & noNo characters 22712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private byte[] smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0 22722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 22732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private Trie2_32 canonIterData; 22742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private ArrayList<UnicodeSet> canonStartSets; 22752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 22762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // bits in canonIterData 22772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int CANON_NOT_SEGMENT_STARTER = 0x80000000; 22782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int CANON_HAS_COMPOSITIONS = 0x40000000; 22792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int CANON_HAS_SET = 0x200000; 22802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int CANON_VALUE_MASK = 0x1fffff; 22812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller} 2282