12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */
2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/*
52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *******************************************************************************
6bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin * Copyright (C) 1996-2016, International Business Machines Corporation and    *
72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * others. All Rights Reserved.                                                *
82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *******************************************************************************
92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */
102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.text;
112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/**
132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* A compression engine implementing the Standard Compression Scheme
142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* for Unicode (SCSU) as outlined in <A
152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* HREF="http://www.unicode.org/unicode/reports/tr6">Unicode Technical
162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* Report #6</A>.
172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* <P>The SCSU works by using dynamically positioned <EM>windows</EM>
192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* consisting of 128 consecutive characters in Unicode.  During compression,
202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* characters within a window are encoded in the compressed stream as the bytes
212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* <TT>0x7F - 0xFF</TT>. The SCSU provides transparency for the characters
222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* (bytes) between <TT>U+0000 - U+00FF</TT>.  The SCSU approximates the
232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* storage size of traditional character sets, for example 1 byte per
242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* character for ASCII or Latin-1 text, and 2 bytes per character for CJK
252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* ideographs.</P>
262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* <P><STRONG>USAGE</STRONG></P>
282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* <P>The static methods on <TT>UnicodeCompressor</TT> may be used in a
302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* straightforward manner to compress simple strings:</P>
312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* <PRE>
332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*  String s = ... ; // get string from somewhere
342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*  byte [] compressed = UnicodeCompressor.compress(s);
352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* </PRE>
362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* <P>The static methods have a fairly large memory footprint.
382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* For finer-grained control over memory usage,
392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* <TT>UnicodeCompressor</TT> offers more powerful APIs allowing
402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* iterative compression:</P>
412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* <PRE>
432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*  // Compress an array "chars" of length "len" using a buffer of 512 bytes
442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*  // to the OutputStream "out"
452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*  UnicodeCompressor myCompressor         = new UnicodeCompressor();
472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*  final static int  BUFSIZE              = 512;
482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*  byte []           byteBuffer           = new byte [ BUFSIZE ];
492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*  int               bytesWritten         = 0;
502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*  int []            unicharsRead         = new int [1];
512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*  int               totalCharsCompressed = 0;
522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*  int               totalBytesWritten    = 0;
532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*  do {
552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    // do the compression
562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    bytesWritten = myCompressor.compress(chars, totalCharsCompressed,
572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*                                         len, unicharsRead,
582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*                                         byteBuffer, 0, BUFSIZE);
592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    // do something with the current set of bytes
612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    out.write(byteBuffer, 0, bytesWritten);
622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    // update the no. of characters compressed
642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    totalCharsCompressed += unicharsRead[0];
652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    // update the no. of bytes written
672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    totalBytesWritten += bytesWritten;
682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
69bfab1e7fec36dff93fb980c546ad64a565faf9fcPaul Duffin*  } while(totalCharsCompressed &lt; len);
702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*  myCompressor.reset(); // reuse compressor
722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* </PRE>
732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* @see UnicodeDecompressor
752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* @author Stephen F. Booth
77836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller* @hide Only a subset of ICU is exposed in Android
782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*/
792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/*
812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* COMPRESSION STRATEGY
832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* Single Byte Mode
852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* There are three relevant cases.
872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* If the character is in the current window or is Latin-1 (U+0000,
882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* U+0009, U+000A, U+000D, U+0020 - U+007F), the character is placed
892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* directly in the stream as a single byte.
902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*  1. Current character is in defined, inactive window.
922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*  2. Current character is in undefined window.
932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*  3. Current character is uncompressible Unicode (U+3400 - U+DFFF).
942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*  1. Current character is in defined, inactive window
962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    A. Look ahead two characters
972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    B. If both following characters in same window as current character,
982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*       switch to defined window
992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    C. If only next character is in same window as current character,
1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*       quote defined window
1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    D. If neither of following characters is in same window as current,
1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*       quote defined window
1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*  2. Current character is in undefined window
1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    A. Look ahead two characters
1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    B. If both following characters in same window as current character,
1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*       define new window
1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    C. If only next character in same window as current character,
1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*       switch to Unicode mode
1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*       NOTE: This costs us one extra byte.  However,
1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*        since we have a limited number of windows to work with, it is
1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*        assumed the cost will pay off later in savings from a window with
1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*        more characters in it.
1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    D. If neither of following characters in same window as current,
1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*       switch to Unicode mode.  Alternative to above: just quote
1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*       Unicode (same byte cost)
1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*  3. Current character is uncompressible Unicode (U+3400 - U+DFFF)
1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    A. Look ahead one character
1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    B. If next character in non-compressible region, switch to
1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*       Unicode mode
1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    C. If next character not in non-compressible region, quote Unicode
1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* The following chart illustrates the bytes required for encoding characters
1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* in each possible way
1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*                                   SINGLE BYTE MODE
1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*                                       Characters in a row with same index
1312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*               tag encountered             1       2       3       4
1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*               ---------------------------------------------------------------
1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*               none (in current window)    1       2       3       4
1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*               quote Unicode               3       6       9       12
1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*   window not  switch to Unicode           3       5       7       9     byte
1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*   defined     define window               3       4       5       6     cost
1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*   window      switch to window            2       3       4       5
1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*   defined     quote window                2       4       6       8
1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*  Unicode Mode
1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* There are two relevant cases.
1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* If the character is in the non-compressible region
1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* (U+3400 - U+DFFF), the character is simply written to the
1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* stream as a pair of bytes.
1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 1. Current character is in defined, inactive window.
1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* 2. Current character is in undefined window.
1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*  1.Current character is in defined, inactive window
1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    A. Look ahead one character
1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    B. If next character has same index as current character,
1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*       switch to defined window (and switch to single-byte mode)
1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    C. If not, just put bytes in stream
1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*  2. Current character is in undefined window
1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    A. Look ahead two characters
1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    B. If both in same window as current character, define window
1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*       (and switch to single-byte mode)
1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    C. If only next character in same window, just put bytes in stream
1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*        NOTE: This costs us one extra byte.  However,
1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*        since we have a limited number of windows to work with, it is
1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*        assumed the cost will pay off later in savings from a window with
1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*        more characters in it.
1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*    D. If neither in same window, put bytes in stream
1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* The following chart illustrates the bytes required for encoding characters
1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* in each possible way
1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*                                   UNICODE MODE
1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*                                       Characters in a row with same index
1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*               tag encountered             1       2       3       4
1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*               ---------------------------------------------------------------
1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*               none                        2       4       6       8
1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*               quote Unicode               3       6       9       12
1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
1842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*   window not  define window               3       4       5       6     byte
1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*   defined                                                               cost
1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*   window      switch to window            2       3       4       5
1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*   defined
1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*/
1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic final class UnicodeCompressor implements SCSU
1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller{
1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //==========================
1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Class variables
1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //==========================
1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /** For quick identification of a byte as a single-byte mode tag */
1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static boolean [] sSingleTagTable = {
1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // table generated by CompressionTableGenerator
1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        false, true, true, true, true, true, true, true, true, false,
1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, true, true, false, true, true, true, true, true, true,
2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    true, true, true, true, true, true, true, true, true, true,
2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    true, true, false, false, false, false, false, false,false,
2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false
2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    };
2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /** For quick identification of a byte as a unicode mode tag */
2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static boolean [] sUnicodeTagTable = {
2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // table generated by CompressionTableGenerator
2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        false, false, false, false, false, false, false, false, false,
2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, true,
2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    true, true, true, true, true, true, true, true, true, true,
2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    true, true, true, true, true, true, true, true, false, false,
2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false, false, false, false, false, false, false, false,
2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    false, false
2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    };
2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //==========================
2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Instance variables
2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //==========================
2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /** Alias to current dynamic window */
2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int       fCurrentWindow   = 0;
2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /** Dynamic compression window offsets */
2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int []    fOffsets         = new int [ NUMWINDOWS ];
2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /** Current compression mode */
2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int       fMode            = SINGLEBYTEMODE;
2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /** Keeps count of times character indices are encountered */
2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int []    fIndexCount      = new int [ MAXINDEX + 1 ];
2782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /** The time stamps indicate when a window was last defined */
2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int []    fTimeStamps      = new int [ NUMWINDOWS ];
2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /** The current time stamp */
2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int       fTimeStamp       = 0;
2842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Create a UnicodeCompressor.
2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Sets all windows to their default values.
2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see #reset
2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public UnicodeCompressor()
2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    reset();              // initialize to defaults
2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Compress a string into a byte array.
2982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param buffer The string to compress.
2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return A byte array containing the compressed characters.
3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see #compress(char [], int, int)
3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static byte [] compress(String buffer)
3032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    return compress(buffer.toCharArray(), 0, buffer.length());
3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Compress a Unicode character array into a byte array.
3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param buffer The character buffer to compress.
3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param start The start of the character run to compress.
3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param limit The limit of the character run to compress.
3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return A byte array containing the compressed characters.
3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see #compress(String)
3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static byte [] compress(char [] buffer,
3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                   int start,
3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                   int limit)
3182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    UnicodeCompressor comp = new UnicodeCompressor();
3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // use a buffer that we know will never overflow
3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // in the worst case, each character will take 3 bytes
3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // to encode: UQU, hibyte, lobyte.  In this case, the
3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // compressed data will look like: SCU, UQU, hibyte, lobyte, ...
3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // buffer must be at least 4 bytes in size
3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    int len = Math.max(4, 3 * (limit - start) + 1);
3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    byte [] temp = new byte [len];
3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    int byteCount = comp.compress(buffer, start, limit, null,
3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                      temp, 0, len);
3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    byte [] result = new byte [byteCount];
3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    System.arraycopy(temp, 0, result, 0, byteCount);
3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    return result;
3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Compress a Unicode character array into a byte array.
3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * This function will only consume input that can be completely
3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * output.
3422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param charBuffer The character buffer to compress.
3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param charBufferStart The start of the character run to compress.
3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param charBufferLimit The limit of the character run to compress.
3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param charsRead A one-element array.  If not null, on return
3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the number of characters read from charBuffer.
3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param byteBuffer A buffer to receive the compressed data.  This
3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * buffer must be at minimum four bytes in size.
3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param byteBufferStart The starting offset to which to write
3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * compressed data.
3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param byteBufferLimit The limiting offset for writing compressed data.
3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return The number of bytes written to byteBuffer.
3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int compress(char []     charBuffer,
3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int         charBufferStart,
3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int         charBufferLimit,
3582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int []      charsRead,
3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byte []     byteBuffer,
3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int         byteBufferStart,
3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int         byteBufferLimit)
3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // the current position in the target byte buffer
3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    int     bytePos       = byteBufferStart;
3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // the current position in the source unicode character buffer
3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    int     ucPos         = charBufferStart;
3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // the current unicode character from the source buffer
3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    int     curUC         = INVALIDCHAR;
3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // the index for the current character
3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int     curIndex      = -1;
3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // look ahead
3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    int     nextUC        = INVALIDCHAR;
3772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    int     forwardUC     = INVALIDCHAR;
3782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // temporary for window searching
3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    int     whichWindow   = 0;
3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // high and low bytes of the current unicode character
3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    int     hiByte        = 0;
3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    int     loByte        = 0;
3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // byteBuffer must be at least 4 bytes in size
3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    if(byteBuffer.length < 4 || (byteBufferLimit - byteBufferStart) < 4)
3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        throw new IllegalArgumentException("byteBuffer.length < 4");
3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    mainLoop:
3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    while(ucPos < charBufferLimit && bytePos < byteBufferLimit) {
3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        switch(fMode) {
3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // main single byte mode compression loop
3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case SINGLEBYTEMODE:
3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        singleByteModeLoop:
3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(ucPos < charBufferLimit && bytePos < byteBufferLimit) {
3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // get current char
3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        curUC = charBuffer[ucPos++];
4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // get next char
4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(ucPos < charBufferLimit)
4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            nextUC = charBuffer[ucPos];
4042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else
4052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            nextUC = INVALIDCHAR;
4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // chars less than 0x0080 (excluding tags) go straight
4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // in stream
4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(curUC < 0x0080) {
4102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            loByte = curUC & 0xFF;
4112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // we need to check and make sure we don't
4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // accidentally write a single byte mode tag to
4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // the stream unless it's quoted
4152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(sSingleTagTable[loByte]) {
4162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // make sure there is enough room to
4172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // write both bytes if not, rewind the
4182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // source stream and break out
4192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if( (bytePos + 1) >= byteBufferLimit)
4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                { --ucPos; break mainLoop; }
4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // since we know the byte is less than 0x80, SQUOTE0
4232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // will use static window 0, or ASCII
4242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) SQUOTE0;
4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) loByte;
4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // if the char belongs to current window, convert it
4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // to a byte by adding the generic compression offset
4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // and subtracting the window's offset
4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else if(inDynamicWindow(curUC, fCurrentWindow) ) {
4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte)
4352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            (curUC - fOffsets[ fCurrentWindow ]
4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             + COMPRESSIONOFFSET);
4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // if char is not in compressible range, either switch to or
4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // quote from unicode
4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else if( ! isCompressible(curUC) ) {
4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // only check next character if it is valid
4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(nextUC != INVALIDCHAR && isCompressible(nextUC)) {
4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // make sure there is enough room to
4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // write all three bytes if not,
4462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // rewind the source stream and break
4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // out
4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if( (bytePos + 2) >= byteBufferLimit)
4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                { --ucPos; break mainLoop; }
4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) SQUOTEU;
4522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) (curUC >>> 8);
4532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) (curUC & 0xFF);
4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // make sure there is enough room to
4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // write all four bytes if not, rewind
4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // the source stream and break out
4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((bytePos + 3) >= byteBufferLimit)
4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                { --ucPos; break mainLoop; }
4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) SCHANGEU;
4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            hiByte = curUC >>> 8;
4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            loByte = curUC & 0xFF;
4662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(sUnicodeTagTable[hiByte])
4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // add quote Unicode tag
4692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                byteBuffer[bytePos++]   = (byte) UQUOTEU;
4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) hiByte;
4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) loByte;
4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fMode = UNICODEMODE;
4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break singleByteModeLoop;
4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // if the char is in a currently defined dynamic
4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // window, figure out which one, and either switch to
4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // it or quote from it
4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else if((whichWindow = findDynamicWindow(curUC))
4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            != INVALIDWINDOW ) {
4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // look ahead
4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if( (ucPos + 1) < charBufferLimit )
4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            forwardUC = charBuffer[ucPos + 1];
4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else
4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            forwardUC = INVALIDCHAR;
4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // all three chars in same window, switch to that
4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // window inDynamicWindow will return false for
4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // INVALIDCHAR
4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(inDynamicWindow(nextUC, whichWindow)
4942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller               && inDynamicWindow(forwardUC, whichWindow)) {
4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // make sure there is enough room to
4962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // write both bytes if not, rewind the
4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // source stream and break out
4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if( (bytePos + 1) >= byteBufferLimit)
4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                { --ucPos; break mainLoop; }
5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte)(SCHANGE0 + whichWindow);
5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte)
5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                (curUC - fOffsets[whichWindow]
5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 + COMPRESSIONOFFSET);
5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fTimeStamps [ whichWindow ] = ++fTimeStamp;
5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fCurrentWindow = whichWindow;
5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // either only next char or neither in same
5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // window, so quote
5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // make sure there is enough room to
5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // write both bytes if not, rewind the
5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // source stream and break out
5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((bytePos + 1) >= byteBufferLimit)
5162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                { --ucPos; break mainLoop; }
5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) (SQUOTE0 + whichWindow);
5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte)
5202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                (curUC - fOffsets[whichWindow]
5212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 + COMPRESSIONOFFSET);
5222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // if a static window is defined, and the following
5262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // character is not in that static window, quote from
5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // the static window Note: to quote from a static
5282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // window, don't add 0x80
5292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else if((whichWindow = findStaticWindow(curUC))
5302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            != INVALIDWINDOW
5312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            && ! inStaticWindow(nextUC, whichWindow) ) {
5322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // make sure there is enough room to write both
5332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // bytes if not, rewind the source stream and
5342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // break out
5352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((bytePos + 1) >= byteBufferLimit)
5362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            { --ucPos; break mainLoop; }
5372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) (SQUOTE0 + whichWindow);
5392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte)
5402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            (curUC - sOffsets[whichWindow]);
5412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // if a window is not defined, decide if we want to
5442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // define a new one or switch to unicode mode
5452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else {
5462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // determine index for current char (char is compressible)
5472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            curIndex = makeIndex(curUC);
5482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fIndexCount[curIndex]++;
5492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // look ahead
5512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((ucPos + 1) < charBufferLimit)
5522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            forwardUC = charBuffer[ucPos + 1];
5532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else
5542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            forwardUC = INVALIDCHAR;
5552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // if we have encountered this index at least once
5572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // before, define a new window
5582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // OR
5592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // three chars in a row with same index, define a
5602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // new window (makeIndex will return RESERVEDINDEX
5612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // for INVALIDCHAR)
5622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((fIndexCount[curIndex] > 1) ||
5632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller               (curIndex == makeIndex(nextUC)
5642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            && curIndex == makeIndex(forwardUC))) {
5652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // make sure there is enough room to write all
5662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // three bytes if not, rewind the source
5672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // stream and break out
5682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if( (bytePos + 2) >= byteBufferLimit)
5692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                { --ucPos; break mainLoop; }
5702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // get least recently defined window
5722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            whichWindow = getLRDefinedWindow();
5732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte)(SDEFINE0 + whichWindow);
5752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) curIndex;
5762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte)
5772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                (curUC - sOffsetTable[curIndex]
5782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 + COMPRESSIONOFFSET);
5792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fOffsets[whichWindow] = sOffsetTable[curIndex];
5812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fCurrentWindow = whichWindow;
5822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fTimeStamps [whichWindow] = ++fTimeStamp;
5832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // only two chars in a row with same index, so
5862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // switch to unicode mode (makeIndex will return
5872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // RESERVEDINDEX for INVALIDCHAR)
5882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // OR
5892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // three chars have different indices, so switch
5902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // to unicode mode
5912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
5922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // make sure there is enough room to write all
5932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // four bytes if not, rewind the source stream
5942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // and break out
5952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((bytePos + 3) >= byteBufferLimit)
5962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                { --ucPos; break mainLoop; }
5972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) SCHANGEU;
5992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            hiByte = curUC >>> 8;
6012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            loByte = curUC & 0xFF;
6022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(sUnicodeTagTable[hiByte])
6042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // add quote Unicode tag
6052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                byteBuffer[bytePos++] = (byte) UQUOTEU;
6062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) hiByte;
6082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) loByte;
6092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fMode = UNICODEMODE;
6112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break singleByteModeLoop;
6122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        break;
6162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case UNICODEMODE:
6182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // main unicode mode compression loop
6192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        unicodeModeLoop:
6202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(ucPos < charBufferLimit && bytePos < byteBufferLimit) {
6212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // get current char
6222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        curUC = charBuffer[ucPos++];
6232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // get next char
6252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if( ucPos < charBufferLimit )
6262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            nextUC = charBuffer[ucPos];
6272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else
6282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            nextUC = INVALIDCHAR;
6292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // if we have two uncompressible chars in a row,
6312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // put the current char's bytes in the stream
6322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if( ! isCompressible(curUC)
6332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            || (nextUC != INVALIDCHAR && ! isCompressible(nextUC))) {
6342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // make sure there is enough room to write all three bytes
6352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // if not, rewind the source stream and break out
6362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if( (bytePos + 2) >= byteBufferLimit)
6372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            { --ucPos; break mainLoop; }
6382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            hiByte = curUC >>> 8;
6402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            loByte = curUC & 0xFF;
6412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(sUnicodeTagTable[ hiByte ])
6432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // add quote Unicode tag
6442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) UQUOTEU;
6452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) hiByte;
6472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) loByte;
6482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // bytes less than 0x80 can go straight in the stream,
6512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // but in single-byte mode
6522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else if(curUC < 0x0080) {
6532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            loByte = curUC & 0xFF;
6542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // if two chars in a row below 0x80 and the
6562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // current char is not a single-byte mode tag,
6572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // switch to single-byte mode
6582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(nextUC != INVALIDCHAR
6592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller               && nextUC < 0x0080 && ! sSingleTagTable[ loByte ] ) {
6602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // make sure there is enough room to
6612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // write both bytes if not, rewind the
6622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // source stream and break out
6632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if( (bytePos + 1) >= byteBufferLimit)
6642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                { --ucPos; break mainLoop; }
6652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // use the last-active window
6672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            whichWindow = fCurrentWindow;
6682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte)(UCHANGE0 + whichWindow);
6692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) loByte;
6702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //fCurrentWindow = 0;
6722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fTimeStamps [whichWindow] = ++fTimeStamp;
6732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fMode = SINGLEBYTEMODE;
6742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break unicodeModeLoop;
6752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // otherwise, just write the bytes to the stream
6782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // (this will cover the case of only 1 char less than 0x80
6792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // and single-byte mode tags)
6802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
6812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // make sure there is enough room to
6822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // write both bytes if not, rewind the
6832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // source stream and break out
6842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((bytePos + 1) >= byteBufferLimit)
6852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                { --ucPos; break mainLoop; }
6862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // since the character is less than 0x80, the
6882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // high byte is always 0x00 - no need for
6892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // (curUC >>> 8)
6902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) 0x00;
6912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) loByte;
6922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // figure out if the current char is in a defined window
6962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else if((whichWindow = findDynamicWindow(curUC))
6972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            != INVALIDWINDOW ) {
6982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // if two chars in a row in the same window,
6992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // switch to that window and go to single-byte mode
7002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // inDynamicWindow will return false for INVALIDCHAR
7012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(inDynamicWindow(nextUC, whichWindow)) {
7022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // make sure there is enough room to
7032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // write both bytes if not, rewind the
7042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // source stream and break out
7052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((bytePos + 1) >= byteBufferLimit)
7062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                { --ucPos; break mainLoop; }
7072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte)(UCHANGE0 + whichWindow);
7092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte)
7102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                (curUC - fOffsets[whichWindow]
7112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 + COMPRESSIONOFFSET);
7122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fTimeStamps [ whichWindow ] = ++fTimeStamp;
7142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fCurrentWindow = whichWindow;
7152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fMode = SINGLEBYTEMODE;
7162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break unicodeModeLoop;
7172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // otherwise, just quote the unicode for the char
7202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
7212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // make sure there is enough room to
7222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // write all three bytes if not,
7232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // rewind the source stream and break
7242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // out
7252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((bytePos + 2) >= byteBufferLimit)
7262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                { --ucPos; break mainLoop; }
7272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            hiByte = curUC >>> 8;
7292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            loByte = curUC & 0xFF;
7302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(sUnicodeTagTable[ hiByte ])
7322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // add quote Unicode tag
7332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                byteBuffer[bytePos++] = (byte) UQUOTEU;
7342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) hiByte;
7362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) loByte;
7372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // char is not in a defined window
7412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else {
7422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // determine index for current char (char is compressible)
7432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            curIndex = makeIndex(curUC);
7442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fIndexCount[curIndex]++;
7452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // look ahead
7472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if( (ucPos + 1) < charBufferLimit )
7482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            forwardUC = charBuffer[ucPos + 1];
7492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else
7502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            forwardUC = INVALIDCHAR;
7512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // if we have encountered this index at least once
7532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // before, define a new window for it that hasn't
7542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // previously been redefined
7552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // OR
7562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // if three chars in a row with the same index,
7572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // define a new window (makeIndex will return
7582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // RESERVEDINDEX for INVALIDCHAR)
7592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((fIndexCount[curIndex] > 1) ||
7602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller               (curIndex == makeIndex(nextUC)
7612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            && curIndex == makeIndex(forwardUC))) {
7622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // make sure there is enough room to
7632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // write all three bytes if not,
7642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // rewind the source stream and break
7652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // out
7662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((bytePos + 2) >= byteBufferLimit)
7672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                { --ucPos; break mainLoop; }
7682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // get least recently defined window
7702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            whichWindow = getLRDefinedWindow();
7712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte)(UDEFINE0 + whichWindow);
7732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) curIndex;
7742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte)
7752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                (curUC - sOffsetTable[curIndex]
7762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 + COMPRESSIONOFFSET);
7772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fOffsets[whichWindow] = sOffsetTable[curIndex];
7792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fCurrentWindow = whichWindow;
7802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fTimeStamps [whichWindow] = ++fTimeStamp;
7812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fMode = SINGLEBYTEMODE;
7822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break unicodeModeLoop;
7832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // otherwise just quote the unicode, and save our
7862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // windows for longer runs
7872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
7882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // make sure there is enough room to
7892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // write all three bytes if not,
7902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // rewind the source stream and break
7912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // out
7922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((bytePos + 2) >= byteBufferLimit)
7932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                { --ucPos; break mainLoop; }
7942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            hiByte = curUC >>> 8;
7962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            loByte = curUC & 0xFF;
7972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(sUnicodeTagTable[ hiByte ])
7992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // add quote Unicode tag
8002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                byteBuffer[bytePos++] = (byte) UQUOTEU;
8012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) hiByte;
8032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byteBuffer[bytePos++] = (byte) loByte;
8042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }  // end switch
8082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // fill in output parameter
8112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    if(charsRead != null)
8122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        charsRead [0] = (ucPos - charBufferStart);
8132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // return # of bytes written
8152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (bytePos - byteBufferStart);
8162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
8192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Reset the compressor to its initial state.
8202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
8212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public void reset()
8222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
8232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    int i;
8242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // reset dynamic windows
8262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fOffsets[0] = 0x0080;    // Latin-1
8272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fOffsets[1] = 0x00C0;    // Latin-1 Supplement + Latin Extended-A
8282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fOffsets[2] = 0x0400;    // Cyrillic
8292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fOffsets[3] = 0x0600;    // Arabic
8302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fOffsets[4] = 0x0900;    // Devanagari
8312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fOffsets[5] = 0x3040;    // Hiragana
8322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fOffsets[6] = 0x30A0;    // Katakana
8332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fOffsets[7] = 0xFF00;    // Fullwidth ASCII
8342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // reset time stamps
8372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(i = 0; i < NUMWINDOWS; i++) {
8382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fTimeStamps[i]          = 0;
8392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // reset count of seen indices
8422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(i = 0; i <= MAXINDEX; i++ ) {
8432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fIndexCount[i] = 0;
8442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fTimeStamp      = 0;                // Reset current time stamp
8472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fCurrentWindow  = 0;                // Make current window Latin-1
8482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fMode           = SINGLEBYTEMODE;   // Always start in single-byte mode
8492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //==========================
8522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Determine the index for a character
8532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //==========================
8542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
8562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Create the index value for a character.
8572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * For more information on this function, refer to table X-3
8582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <A HREF="http://www.unicode.org/unicode/reports/tr6">UTR6</A>.
8592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c The character in question.
8602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return An index for c
8612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
8622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static int makeIndex(int c)
8632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
8642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // check the predefined indices
8652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(c >= 0x00C0 && c < 0x0140)
8662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return LATININDEX;
8672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else if(c >= 0x0250 && c < 0x02D0)
8682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return IPAEXTENSIONINDEX;
8692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else if(c >= 0x0370 && c < 0x03F0)
8702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return GREEKINDEX;
8712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else if(c >= 0x0530 && c < 0x0590)
8722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return ARMENIANINDEX;
8732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else if(c >= 0x3040 && c < 0x30A0)
8742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return HIRAGANAINDEX;
8752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else if(c >= 0x30A0 && c < 0x3120)
8762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return KATAKANAINDEX;
8772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else if(c >= 0xFF60 && c < 0xFF9F)
8782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return HALFWIDTHKATAKANAINDEX;
8792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // calculate index
8812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else if(c >= 0x0080 && c < 0x3400)
8822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return (c / 0x80) & 0xFF;
8832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else if(c >= 0xE000 && c <= 0xFFFF)
8842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return ((c - 0xAC00) / 0x80) & 0xFF;
8852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // should never happen
8872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else {
8882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return RESERVEDINDEX;
8892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //==========================
8932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Check if a given character fits in a window
8942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //==========================
8952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
8972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    * Determine if a character is in a dynamic window.
8982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    * @param c The character to test
8992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    * @param whichWindow The dynamic window the test
9002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    * @return true if <TT>c</TT> will fit in <TT>whichWindow</TT>,
9012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    * false otherwise.
9022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    */
9032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean inDynamicWindow(int c,
9042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int whichWindow)
9052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
9062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (c >= fOffsets[whichWindow]
9072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        && c < (fOffsets[whichWindow] + 0x80));
9082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
9112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Determine if a character is in a static window.
9122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    * @param c The character to test
9132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    * @param whichWindow The static window the test
9142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    * @return true if <TT>c</TT> will fit in <TT>whichWindow</TT>,
9152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    * false otherwise.
9162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    */
9172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static boolean inStaticWindow(int c,
9182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                      int whichWindow)
9192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
9202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (c >= sOffsets[whichWindow]
9212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        && c < (sOffsets[whichWindow] + 0x80));
9222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //==========================
9252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Check if a given character is compressible
9262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //==========================
9272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
9292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    * Determine if a character is compressible.
9302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    * @param c The character to test.
9312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    * @return true if the <TT>c</TT> is compressible, false otherwise.
9322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    */
9332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static boolean isCompressible(int c)
9342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
9352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (c < 0x3400 || c >= 0xE000);
9362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //==========================
9392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Check if a window is defined for a given character
9402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //==========================
9412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
9432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Determine if a dynamic window for a certain character is defined
9442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c The character in question
9452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return The dynamic window containing <TT>c</TT>, or
9462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * INVALIDWINDOW if not defined.
9472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
9482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int findDynamicWindow(int c)
9492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
9502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // supposedly faster to count down
9512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //for(int i = 0; i < NUMWINDOWS; i++) {
9522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    for(int i = NUMWINDOWS - 1; i >= 0; --i) {
9532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(inDynamicWindow(c, i)) {
9542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ++fTimeStamps[i];
9552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return i;
9562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return INVALIDWINDOW;
9602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
9632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Determine if a static window for a certain character is defined
9642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c The character in question
9652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return The static window containing <TT>c</TT>, or
9662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * INVALIDWINDOW if not defined.
9672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
9682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static int findStaticWindow(int c)
9692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
9702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // supposedly faster to count down
9712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //for(int i = 0; i < NUMSTATICWINDOWS; i++) {
9722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    for(int i = NUMSTATICWINDOWS - 1; i >= 0; --i) {
9732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(inStaticWindow(c, i)) {
9742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return i;
9752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return INVALIDWINDOW;
9792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //==========================
9822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Find the least-recently used window
9832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //==========================
9842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /** Find the least-recently defined window */
9862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int getLRDefinedWindow()
9872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
9882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int leastRU         = Integer.MAX_VALUE;
9892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int whichWindow     = INVALIDWINDOW;
9902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // find least recently used window
9922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // supposedly faster to count down
9932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //for( int i = 0; i < NUMWINDOWS; i++ ) {
9942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(int i = NUMWINDOWS - 1; i >= 0; --i ) {
9952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if( fTimeStamps[i] < leastRU ) {
9962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                leastRU   = fTimeStamps[i];
9972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                whichWindow  = i;
9982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return whichWindow;
10022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
10032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller}
1005