12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */ 2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others. 3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* 52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ******************************************************************************* 6bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * Copyright (C) 1996-2016, International Business Machines Corporation and * 72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * others. All Rights Reserved. * 82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ******************************************************************************* 92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.text; 112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/** 132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* A compression engine implementing the Standard Compression Scheme 142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* for Unicode (SCSU) as outlined in <A 152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* HREF="http://www.unicode.org/unicode/reports/tr6">Unicode Technical 162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* Report #6</A>. 172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* <P>The SCSU works by using dynamically positioned <EM>windows</EM> 192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* consisting of 128 consecutive characters in Unicode. During compression, 202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* characters within a window are encoded in the compressed stream as the bytes 212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* <TT>0x7F - 0xFF</TT>. The SCSU provides transparency for the characters 222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* (bytes) between <TT>U+0000 - U+00FF</TT>. The SCSU approximates the 232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* storage size of traditional character sets, for example 1 byte per 242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* character for ASCII or Latin-1 text, and 2 bytes per character for CJK 252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* ideographs.</P> 262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* <P><STRONG>USAGE</STRONG></P> 282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* <P>The static methods on <TT>UnicodeCompressor</TT> may be used in a 302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* straightforward manner to compress simple strings:</P> 312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* <PRE> 332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* String s = ... ; // get string from somewhere 342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* byte [] compressed = UnicodeCompressor.compress(s); 352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* </PRE> 362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* <P>The static methods have a fairly large memory footprint. 382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* For finer-grained control over memory usage, 392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* <TT>UnicodeCompressor</TT> offers more powerful APIs allowing 402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* iterative compression:</P> 412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* <PRE> 432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* // Compress an array "chars" of length "len" using a buffer of 512 bytes 442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* // to the OutputStream "out" 452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* UnicodeCompressor myCompressor = new UnicodeCompressor(); 472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* final static int BUFSIZE = 512; 482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* byte [] byteBuffer = new byte [ BUFSIZE ]; 492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* int bytesWritten = 0; 502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* int [] unicharsRead = new int [1]; 512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* int totalCharsCompressed = 0; 522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* int totalBytesWritten = 0; 532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* do { 552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* // do the compression 562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* bytesWritten = myCompressor.compress(chars, totalCharsCompressed, 572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* len, unicharsRead, 582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* byteBuffer, 0, BUFSIZE); 592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* // do something with the current set of bytes 612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* out.write(byteBuffer, 0, bytesWritten); 622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* // update the no. of characters compressed 642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* totalCharsCompressed += unicharsRead[0]; 652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* // update the no. of bytes written 672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* totalBytesWritten += bytesWritten; 682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 69bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin* } while(totalCharsCompressed < len); 702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* myCompressor.reset(); // reuse compressor 722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* </PRE> 732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* @see UnicodeDecompressor 752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* @author Stephen F. Booth 77836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller* @hide Only a subset of ICU is exposed in Android 782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*/ 792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* 812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* COMPRESSION STRATEGY 832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* Single Byte Mode 852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* There are three relevant cases. 872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* If the character is in the current window or is Latin-1 (U+0000, 882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* U+0009, U+000A, U+000D, U+0020 - U+007F), the character is placed 892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* directly in the stream as a single byte. 902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1. Current character is in defined, inactive window. 922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 2. Current character is in undefined window. 932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 3. Current character is uncompressible Unicode (U+3400 - U+DFFF). 942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1. Current character is in defined, inactive window 962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* A. Look ahead two characters 972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* B. If both following characters in same window as current character, 982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* switch to defined window 992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* C. If only next character is in same window as current character, 1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* quote defined window 1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* D. If neither of following characters is in same window as current, 1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* quote defined window 1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 2. Current character is in undefined window 1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* A. Look ahead two characters 1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* B. If both following characters in same window as current character, 1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* define new window 1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* C. If only next character in same window as current character, 1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* switch to Unicode mode 1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* NOTE: This costs us one extra byte. However, 1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* since we have a limited number of windows to work with, it is 1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* assumed the cost will pay off later in savings from a window with 1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* more characters in it. 1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* D. If neither of following characters in same window as current, 1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* switch to Unicode mode. Alternative to above: just quote 1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* Unicode (same byte cost) 1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 3. Current character is uncompressible Unicode (U+3400 - U+DFFF) 1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* A. Look ahead one character 1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* B. If next character in non-compressible region, switch to 1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* Unicode mode 1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* C. If next character not in non-compressible region, quote Unicode 1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* The following chart illustrates the bytes required for encoding characters 1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* in each possible way 1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* SINGLE BYTE MODE 1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* Characters in a row with same index 1312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* tag encountered 1 2 3 4 1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* --------------------------------------------------------------- 1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* none (in current window) 1 2 3 4 1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* quote Unicode 3 6 9 12 1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* window not switch to Unicode 3 5 7 9 byte 1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* defined define window 3 4 5 6 cost 1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* window switch to window 2 3 4 5 1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* defined quote window 2 4 6 8 1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* Unicode Mode 1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* There are two relevant cases. 1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* If the character is in the non-compressible region 1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* (U+3400 - U+DFFF), the character is simply written to the 1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* stream as a pair of bytes. 1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1. Current character is in defined, inactive window. 1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 2. Current character is in undefined window. 1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1.Current character is in defined, inactive window 1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* A. Look ahead one character 1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* B. If next character has same index as current character, 1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* switch to defined window (and switch to single-byte mode) 1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* C. If not, just put bytes in stream 1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 2. Current character is in undefined window 1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* A. Look ahead two characters 1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* B. If both in same window as current character, define window 1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* (and switch to single-byte mode) 1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* C. If only next character in same window, just put bytes in stream 1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* NOTE: This costs us one extra byte. However, 1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* since we have a limited number of windows to work with, it is 1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* assumed the cost will pay off later in savings from a window with 1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* more characters in it. 1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* D. If neither in same window, put bytes in stream 1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* The following chart illustrates the bytes required for encoding characters 1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* in each possible way 1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* UNICODE MODE 1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* Characters in a row with same index 1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* tag encountered 1 2 3 4 1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* --------------------------------------------------------------- 1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* none 2 4 6 8 1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* quote Unicode 3 6 9 12 1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* window not define window 3 4 5 6 byte 1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* defined cost 1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* window switch to window 2 3 4 5 1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* defined 1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*/ 1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic final class UnicodeCompressor implements SCSU 1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller{ 1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //========================== 1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Class variables 1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //========================== 1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** For quick identification of a byte as a single-byte mode tag */ 1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static boolean [] sSingleTagTable = { 1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // table generated by CompressionTableGenerator 1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, true, true, true, true, true, true, true, true, false, 1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, true, true, false, true, true, true, true, true, true, 2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller true, true, true, true, true, true, true, true, true, true, 2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller true, true, false, false, false, false, false, false,false, 2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false 2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller }; 2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** For quick identification of a byte as a unicode mode tag */ 2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static boolean [] sUnicodeTagTable = { 2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // table generated by CompressionTableGenerator 2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, true, 2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller true, true, true, true, true, true, true, true, true, true, 2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller true, true, true, true, true, true, true, true, false, false, 2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false, false, false, false, false, false, false, false, 2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller false, false 2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller }; 2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //========================== 2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Instance variables 2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //========================== 2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** Alias to current dynamic window */ 2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int fCurrentWindow = 0; 2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** Dynamic compression window offsets */ 2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int [] fOffsets = new int [ NUMWINDOWS ]; 2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** Current compression mode */ 2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int fMode = SINGLEBYTEMODE; 2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** Keeps count of times character indices are encountered */ 2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int [] fIndexCount = new int [ MAXINDEX + 1 ]; 2782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** The time stamps indicate when a window was last defined */ 2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int [] fTimeStamps = new int [ NUMWINDOWS ]; 2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** The current time stamp */ 2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int fTimeStamp = 0; 2842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Create a UnicodeCompressor. 2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Sets all windows to their default values. 2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see #reset 2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public UnicodeCompressor() 2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller reset(); // initialize to defaults 2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Compress a string into a byte array. 2982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param buffer The string to compress. 2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return A byte array containing the compressed characters. 3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see #compress(char [], int, int) 3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static byte [] compress(String buffer) 3032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return compress(buffer.toCharArray(), 0, buffer.length()); 3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Compress a Unicode character array into a byte array. 3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param buffer The character buffer to compress. 3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param start The start of the character run to compress. 3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param limit The limit of the character run to compress. 3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return A byte array containing the compressed characters. 3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see #compress(String) 3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static byte [] compress(char [] buffer, 3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int start, 3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int limit) 3182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeCompressor comp = new UnicodeCompressor(); 3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // use a buffer that we know will never overflow 3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // in the worst case, each character will take 3 bytes 3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // to encode: UQU, hibyte, lobyte. In this case, the 3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // compressed data will look like: SCU, UQU, hibyte, lobyte, ... 3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // buffer must be at least 4 bytes in size 3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int len = Math.max(4, 3 * (limit - start) + 1); 3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byte [] temp = new byte [len]; 3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int byteCount = comp.compress(buffer, start, limit, null, 3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller temp, 0, len); 3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byte [] result = new byte [byteCount]; 3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.arraycopy(temp, 0, result, 0, byteCount); 3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result; 3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Compress a Unicode character array into a byte array. 3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This function will only consume input that can be completely 3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * output. 3422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param charBuffer The character buffer to compress. 3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param charBufferStart The start of the character run to compress. 3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param charBufferLimit The limit of the character run to compress. 3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param charsRead A one-element array. If not null, on return 3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the number of characters read from charBuffer. 3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param byteBuffer A buffer to receive the compressed data. This 3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * buffer must be at minimum four bytes in size. 3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param byteBufferStart The starting offset to which to write 3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * compressed data. 3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param byteBufferLimit The limiting offset for writing compressed data. 3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return The number of bytes written to byteBuffer. 3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int compress(char [] charBuffer, 3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int charBufferStart, 3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int charBufferLimit, 3582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int [] charsRead, 3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byte [] byteBuffer, 3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int byteBufferStart, 3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int byteBufferLimit) 3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the current position in the target byte buffer 3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int bytePos = byteBufferStart; 3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the current position in the source unicode character buffer 3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int ucPos = charBufferStart; 3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the current unicode character from the source buffer 3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int curUC = INVALIDCHAR; 3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the index for the current character 3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int curIndex = -1; 3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // look ahead 3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int nextUC = INVALIDCHAR; 3772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int forwardUC = INVALIDCHAR; 3782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // temporary for window searching 3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int whichWindow = 0; 3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // high and low bytes of the current unicode character 3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int hiByte = 0; 3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int loByte = 0; 3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // byteBuffer must be at least 4 bytes in size 3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(byteBuffer.length < 4 || (byteBufferLimit - byteBufferStart) < 4) 3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("byteBuffer.length < 4"); 3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller mainLoop: 3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(ucPos < charBufferLimit && bytePos < byteBufferLimit) { 3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller switch(fMode) { 3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // main single byte mode compression loop 3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case SINGLEBYTEMODE: 3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller singleByteModeLoop: 3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(ucPos < charBufferLimit && bytePos < byteBufferLimit) { 3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // get current char 3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller curUC = charBuffer[ucPos++]; 4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // get next char 4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(ucPos < charBufferLimit) 4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nextUC = charBuffer[ucPos]; 4042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else 4052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nextUC = INVALIDCHAR; 4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // chars less than 0x0080 (excluding tags) go straight 4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // in stream 4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(curUC < 0x0080) { 4102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller loByte = curUC & 0xFF; 4112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // we need to check and make sure we don't 4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // accidentally write a single byte mode tag to 4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the stream unless it's quoted 4152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(sSingleTagTable[loByte]) { 4162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // make sure there is enough room to 4172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // write both bytes if not, rewind the 4182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // source stream and break out 4192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( (bytePos + 1) >= byteBufferLimit) 4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { --ucPos; break mainLoop; } 4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // since we know the byte is less than 0x80, SQUOTE0 4232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // will use static window 0, or ASCII 4242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) SQUOTE0; 4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) loByte; 4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if the char belongs to current window, convert it 4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // to a byte by adding the generic compression offset 4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and subtracting the window's offset 4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if(inDynamicWindow(curUC, fCurrentWindow) ) { 4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) 4352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (curUC - fOffsets[ fCurrentWindow ] 4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller + COMPRESSIONOFFSET); 4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if char is not in compressible range, either switch to or 4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // quote from unicode 4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if( ! isCompressible(curUC) ) { 4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // only check next character if it is valid 4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(nextUC != INVALIDCHAR && isCompressible(nextUC)) { 4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // make sure there is enough room to 4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // write all three bytes if not, 4462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // rewind the source stream and break 4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // out 4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( (bytePos + 2) >= byteBufferLimit) 4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { --ucPos; break mainLoop; } 4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) SQUOTEU; 4522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) (curUC >>> 8); 4532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) (curUC & 0xFF); 4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // make sure there is enough room to 4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // write all four bytes if not, rewind 4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the source stream and break out 4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((bytePos + 3) >= byteBufferLimit) 4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { --ucPos; break mainLoop; } 4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) SCHANGEU; 4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller hiByte = curUC >>> 8; 4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller loByte = curUC & 0xFF; 4662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(sUnicodeTagTable[hiByte]) 4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // add quote Unicode tag 4692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) UQUOTEU; 4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) hiByte; 4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) loByte; 4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fMode = UNICODEMODE; 4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break singleByteModeLoop; 4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if the char is in a currently defined dynamic 4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // window, figure out which one, and either switch to 4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // it or quote from it 4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if((whichWindow = findDynamicWindow(curUC)) 4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller != INVALIDWINDOW ) { 4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // look ahead 4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( (ucPos + 1) < charBufferLimit ) 4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller forwardUC = charBuffer[ucPos + 1]; 4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else 4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller forwardUC = INVALIDCHAR; 4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // all three chars in same window, switch to that 4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // window inDynamicWindow will return false for 4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // INVALIDCHAR 4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(inDynamicWindow(nextUC, whichWindow) 4942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller && inDynamicWindow(forwardUC, whichWindow)) { 4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // make sure there is enough room to 4962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // write both bytes if not, rewind the 4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // source stream and break out 4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( (bytePos + 1) >= byteBufferLimit) 4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { --ucPos; break mainLoop; } 5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte)(SCHANGE0 + whichWindow); 5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) 5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (curUC - fOffsets[whichWindow] 5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller + COMPRESSIONOFFSET); 5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fTimeStamps [ whichWindow ] = ++fTimeStamp; 5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fCurrentWindow = whichWindow; 5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // either only next char or neither in same 5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // window, so quote 5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // make sure there is enough room to 5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // write both bytes if not, rewind the 5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // source stream and break out 5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((bytePos + 1) >= byteBufferLimit) 5162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { --ucPos; break mainLoop; } 5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) (SQUOTE0 + whichWindow); 5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) 5202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (curUC - fOffsets[whichWindow] 5212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller + COMPRESSIONOFFSET); 5222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if a static window is defined, and the following 5262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // character is not in that static window, quote from 5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the static window Note: to quote from a static 5282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // window, don't add 0x80 5292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if((whichWindow = findStaticWindow(curUC)) 5302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller != INVALIDWINDOW 5312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller && ! inStaticWindow(nextUC, whichWindow) ) { 5322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // make sure there is enough room to write both 5332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // bytes if not, rewind the source stream and 5342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // break out 5352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((bytePos + 1) >= byteBufferLimit) 5362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { --ucPos; break mainLoop; } 5372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) (SQUOTE0 + whichWindow); 5392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) 5402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (curUC - sOffsets[whichWindow]); 5412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if a window is not defined, decide if we want to 5442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // define a new one or switch to unicode mode 5452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 5462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // determine index for current char (char is compressible) 5472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller curIndex = makeIndex(curUC); 5482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fIndexCount[curIndex]++; 5492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // look ahead 5512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((ucPos + 1) < charBufferLimit) 5522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller forwardUC = charBuffer[ucPos + 1]; 5532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else 5542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller forwardUC = INVALIDCHAR; 5552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if we have encountered this index at least once 5572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // before, define a new window 5582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // OR 5592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // three chars in a row with same index, define a 5602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // new window (makeIndex will return RESERVEDINDEX 5612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // for INVALIDCHAR) 5622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((fIndexCount[curIndex] > 1) || 5632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (curIndex == makeIndex(nextUC) 5642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller && curIndex == makeIndex(forwardUC))) { 5652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // make sure there is enough room to write all 5662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // three bytes if not, rewind the source 5672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // stream and break out 5682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( (bytePos + 2) >= byteBufferLimit) 5692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { --ucPos; break mainLoop; } 5702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // get least recently defined window 5722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller whichWindow = getLRDefinedWindow(); 5732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte)(SDEFINE0 + whichWindow); 5752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) curIndex; 5762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) 5772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (curUC - sOffsetTable[curIndex] 5782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller + COMPRESSIONOFFSET); 5792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fOffsets[whichWindow] = sOffsetTable[curIndex]; 5812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fCurrentWindow = whichWindow; 5822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fTimeStamps [whichWindow] = ++fTimeStamp; 5832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // only two chars in a row with same index, so 5862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // switch to unicode mode (makeIndex will return 5872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // RESERVEDINDEX for INVALIDCHAR) 5882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // OR 5892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // three chars have different indices, so switch 5902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // to unicode mode 5912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 5922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // make sure there is enough room to write all 5932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // four bytes if not, rewind the source stream 5942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and break out 5952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((bytePos + 3) >= byteBufferLimit) 5962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { --ucPos; break mainLoop; } 5972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) SCHANGEU; 5992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller hiByte = curUC >>> 8; 6012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller loByte = curUC & 0xFF; 6022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(sUnicodeTagTable[hiByte]) 6042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // add quote Unicode tag 6052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) UQUOTEU; 6062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) hiByte; 6082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) loByte; 6092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fMode = UNICODEMODE; 6112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break singleByteModeLoop; 6122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 6162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case UNICODEMODE: 6182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // main unicode mode compression loop 6192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller unicodeModeLoop: 6202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while(ucPos < charBufferLimit && bytePos < byteBufferLimit) { 6212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // get current char 6222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller curUC = charBuffer[ucPos++]; 6232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // get next char 6252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( ucPos < charBufferLimit ) 6262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nextUC = charBuffer[ucPos]; 6272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else 6282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nextUC = INVALIDCHAR; 6292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if we have two uncompressible chars in a row, 6312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // put the current char's bytes in the stream 6322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( ! isCompressible(curUC) 6332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller || (nextUC != INVALIDCHAR && ! isCompressible(nextUC))) { 6342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // make sure there is enough room to write all three bytes 6352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if not, rewind the source stream and break out 6362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( (bytePos + 2) >= byteBufferLimit) 6372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { --ucPos; break mainLoop; } 6382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller hiByte = curUC >>> 8; 6402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller loByte = curUC & 0xFF; 6412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(sUnicodeTagTable[ hiByte ]) 6432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // add quote Unicode tag 6442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) UQUOTEU; 6452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) hiByte; 6472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) loByte; 6482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // bytes less than 0x80 can go straight in the stream, 6512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // but in single-byte mode 6522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if(curUC < 0x0080) { 6532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller loByte = curUC & 0xFF; 6542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if two chars in a row below 0x80 and the 6562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // current char is not a single-byte mode tag, 6572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // switch to single-byte mode 6582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(nextUC != INVALIDCHAR 6592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller && nextUC < 0x0080 && ! sSingleTagTable[ loByte ] ) { 6602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // make sure there is enough room to 6612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // write both bytes if not, rewind the 6622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // source stream and break out 6632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( (bytePos + 1) >= byteBufferLimit) 6642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { --ucPos; break mainLoop; } 6652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // use the last-active window 6672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller whichWindow = fCurrentWindow; 6682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte)(UCHANGE0 + whichWindow); 6692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) loByte; 6702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //fCurrentWindow = 0; 6722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fTimeStamps [whichWindow] = ++fTimeStamp; 6732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fMode = SINGLEBYTEMODE; 6742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break unicodeModeLoop; 6752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // otherwise, just write the bytes to the stream 6782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // (this will cover the case of only 1 char less than 0x80 6792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and single-byte mode tags) 6802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 6812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // make sure there is enough room to 6822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // write both bytes if not, rewind the 6832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // source stream and break out 6842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((bytePos + 1) >= byteBufferLimit) 6852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { --ucPos; break mainLoop; } 6862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // since the character is less than 0x80, the 6882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // high byte is always 0x00 - no need for 6892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // (curUC >>> 8) 6902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) 0x00; 6912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) loByte; 6922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // figure out if the current char is in a defined window 6962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if((whichWindow = findDynamicWindow(curUC)) 6972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller != INVALIDWINDOW ) { 6982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if two chars in a row in the same window, 6992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // switch to that window and go to single-byte mode 7002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // inDynamicWindow will return false for INVALIDCHAR 7012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(inDynamicWindow(nextUC, whichWindow)) { 7022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // make sure there is enough room to 7032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // write both bytes if not, rewind the 7042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // source stream and break out 7052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((bytePos + 1) >= byteBufferLimit) 7062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { --ucPos; break mainLoop; } 7072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte)(UCHANGE0 + whichWindow); 7092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) 7102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (curUC - fOffsets[whichWindow] 7112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller + COMPRESSIONOFFSET); 7122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fTimeStamps [ whichWindow ] = ++fTimeStamp; 7142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fCurrentWindow = whichWindow; 7152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fMode = SINGLEBYTEMODE; 7162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break unicodeModeLoop; 7172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // otherwise, just quote the unicode for the char 7202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 7212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // make sure there is enough room to 7222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // write all three bytes if not, 7232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // rewind the source stream and break 7242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // out 7252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((bytePos + 2) >= byteBufferLimit) 7262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { --ucPos; break mainLoop; } 7272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller hiByte = curUC >>> 8; 7292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller loByte = curUC & 0xFF; 7302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(sUnicodeTagTable[ hiByte ]) 7322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // add quote Unicode tag 7332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) UQUOTEU; 7342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) hiByte; 7362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) loByte; 7372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // char is not in a defined window 7412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 7422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // determine index for current char (char is compressible) 7432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller curIndex = makeIndex(curUC); 7442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fIndexCount[curIndex]++; 7452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // look ahead 7472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( (ucPos + 1) < charBufferLimit ) 7482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller forwardUC = charBuffer[ucPos + 1]; 7492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else 7502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller forwardUC = INVALIDCHAR; 7512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if we have encountered this index at least once 7532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // before, define a new window for it that hasn't 7542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // previously been redefined 7552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // OR 7562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if three chars in a row with the same index, 7572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // define a new window (makeIndex will return 7582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // RESERVEDINDEX for INVALIDCHAR) 7592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((fIndexCount[curIndex] > 1) || 7602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (curIndex == makeIndex(nextUC) 7612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller && curIndex == makeIndex(forwardUC))) { 7622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // make sure there is enough room to 7632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // write all three bytes if not, 7642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // rewind the source stream and break 7652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // out 7662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((bytePos + 2) >= byteBufferLimit) 7672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { --ucPos; break mainLoop; } 7682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // get least recently defined window 7702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller whichWindow = getLRDefinedWindow(); 7712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte)(UDEFINE0 + whichWindow); 7732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) curIndex; 7742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) 7752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (curUC - sOffsetTable[curIndex] 7762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller + COMPRESSIONOFFSET); 7772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fOffsets[whichWindow] = sOffsetTable[curIndex]; 7792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fCurrentWindow = whichWindow; 7802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fTimeStamps [whichWindow] = ++fTimeStamp; 7812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fMode = SINGLEBYTEMODE; 7822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break unicodeModeLoop; 7832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // otherwise just quote the unicode, and save our 7862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // windows for longer runs 7872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 7882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // make sure there is enough room to 7892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // write all three bytes if not, 7902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // rewind the source stream and break 7912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // out 7922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if((bytePos + 2) >= byteBufferLimit) 7932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { --ucPos; break mainLoop; } 7942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller hiByte = curUC >>> 8; 7962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller loByte = curUC & 0xFF; 7972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(sUnicodeTagTable[ hiByte ]) 7992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // add quote Unicode tag 8002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) UQUOTEU; 8012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) hiByte; 8032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byteBuffer[bytePos++] = (byte) loByte; 8042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } // end switch 8082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // fill in output parameter 8112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(charsRead != null) 8122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller charsRead [0] = (ucPos - charBufferStart); 8132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // return # of bytes written 8152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (bytePos - byteBufferStart); 8162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 8192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Reset the compressor to its initial state. 8202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 8212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public void reset() 8222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 8232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i; 8242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // reset dynamic windows 8262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fOffsets[0] = 0x0080; // Latin-1 8272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fOffsets[1] = 0x00C0; // Latin-1 Supplement + Latin Extended-A 8282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fOffsets[2] = 0x0400; // Cyrillic 8292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fOffsets[3] = 0x0600; // Arabic 8302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fOffsets[4] = 0x0900; // Devanagari 8312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fOffsets[5] = 0x3040; // Hiragana 8322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fOffsets[6] = 0x30A0; // Katakana 8332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fOffsets[7] = 0xFF00; // Fullwidth ASCII 8342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // reset time stamps 8372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(i = 0; i < NUMWINDOWS; i++) { 8382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fTimeStamps[i] = 0; 8392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // reset count of seen indices 8422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(i = 0; i <= MAXINDEX; i++ ) { 8432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fIndexCount[i] = 0; 8442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fTimeStamp = 0; // Reset current time stamp 8472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fCurrentWindow = 0; // Make current window Latin-1 8482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fMode = SINGLEBYTEMODE; // Always start in single-byte mode 8492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //========================== 8522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Determine the index for a character 8532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //========================== 8542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 8562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Create the index value for a character. 8572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * For more information on this function, refer to table X-3 8582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <A HREF="http://www.unicode.org/unicode/reports/tr6">UTR6</A>. 8592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c The character in question. 8602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return An index for c 8612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 8622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static int makeIndex(int c) 8632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 8642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // check the predefined indices 8652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(c >= 0x00C0 && c < 0x0140) 8662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return LATININDEX; 8672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if(c >= 0x0250 && c < 0x02D0) 8682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return IPAEXTENSIONINDEX; 8692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if(c >= 0x0370 && c < 0x03F0) 8702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return GREEKINDEX; 8712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if(c >= 0x0530 && c < 0x0590) 8722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ARMENIANINDEX; 8732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if(c >= 0x3040 && c < 0x30A0) 8742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return HIRAGANAINDEX; 8752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if(c >= 0x30A0 && c < 0x3120) 8762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return KATAKANAINDEX; 8772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if(c >= 0xFF60 && c < 0xFF9F) 8782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return HALFWIDTHKATAKANAINDEX; 8792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // calculate index 8812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if(c >= 0x0080 && c < 0x3400) 8822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (c / 0x80) & 0xFF; 8832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if(c >= 0xE000 && c <= 0xFFFF) 8842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ((c - 0xAC00) / 0x80) & 0xFF; 8852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // should never happen 8872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 8882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return RESERVEDINDEX; 8892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //========================== 8932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Check if a given character fits in a window 8942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //========================== 8952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 8972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Determine if a character is in a dynamic window. 8982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c The character to test 8992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param whichWindow The dynamic window the test 9002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if <TT>c</TT> will fit in <TT>whichWindow</TT>, 9012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * false otherwise. 9022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 9032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private boolean inDynamicWindow(int c, 9042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int whichWindow) 9052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 9062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (c >= fOffsets[whichWindow] 9072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller && c < (fOffsets[whichWindow] + 0x80)); 9082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 9112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Determine if a character is in a static window. 9122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c The character to test 9132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param whichWindow The static window the test 9142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if <TT>c</TT> will fit in <TT>whichWindow</TT>, 9152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * false otherwise. 9162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 9172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static boolean inStaticWindow(int c, 9182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int whichWindow) 9192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 9202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (c >= sOffsets[whichWindow] 9212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller && c < (sOffsets[whichWindow] + 0x80)); 9222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //========================== 9252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Check if a given character is compressible 9262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //========================== 9272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 9292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Determine if a character is compressible. 9302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c The character to test. 9312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if the <TT>c</TT> is compressible, false otherwise. 9322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 9332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static boolean isCompressible(int c) 9342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 9352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (c < 0x3400 || c >= 0xE000); 9362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //========================== 9392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Check if a window is defined for a given character 9402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //========================== 9412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 9432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Determine if a dynamic window for a certain character is defined 9442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c The character in question 9452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return The dynamic window containing <TT>c</TT>, or 9462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * INVALIDWINDOW if not defined. 9472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 9482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int findDynamicWindow(int c) 9492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 9502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // supposedly faster to count down 9512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //for(int i = 0; i < NUMWINDOWS; i++) { 9522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i = NUMWINDOWS - 1; i >= 0; --i) { 9532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(inDynamicWindow(c, i)) { 9542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++fTimeStamps[i]; 9552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return i; 9562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return INVALIDWINDOW; 9602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 9632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Determine if a static window for a certain character is defined 9642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c The character in question 9652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return The static window containing <TT>c</TT>, or 9662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * INVALIDWINDOW if not defined. 9672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 9682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static int findStaticWindow(int c) 9692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 9702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // supposedly faster to count down 9712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //for(int i = 0; i < NUMSTATICWINDOWS; i++) { 9722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i = NUMSTATICWINDOWS - 1; i >= 0; --i) { 9732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if(inStaticWindow(c, i)) { 9742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return i; 9752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return INVALIDWINDOW; 9792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //========================== 9822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Find the least-recently used window 9832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //========================== 9842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** Find the least-recently defined window */ 9862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private int getLRDefinedWindow() 9872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 9882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int leastRU = Integer.MAX_VALUE; 9892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int whichWindow = INVALIDWINDOW; 9902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // find least recently used window 9922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // supposedly faster to count down 9932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //for( int i = 0; i < NUMWINDOWS; i++ ) { 9942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i = NUMWINDOWS - 1; i >= 0; --i ) { 9952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if( fTimeStamps[i] < leastRU ) { 9962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller leastRU = fTimeStamps[i]; 9972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller whichWindow = i; 9982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return whichWindow; 10022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller} 1005