12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others.
22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ****************************************************************************
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 2005-2013, International Business Machines Corporation and *
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.                                             *
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ************************************************************************** *
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text;
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * This class recognizes single-byte encodings. Because the encoding scheme is so
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * simple, language statistics are used to do the matching.
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertabstract class CharsetRecog_sbcs extends CharsetRecognizer {
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* (non-Javadoc)
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see com.ibm.icu.text.CharsetRecognizer#getName()
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
222d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    @Override
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    abstract String getName();
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static class NGramParser
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//        private static final int N_GRAM_SIZE = 3;
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static final int N_GRAM_MASK = 0xFFFFFF;
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected int byteIndex = 0;
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private int ngram = 0;
322d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private int[] ngramList;
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected byte[] byteMap;
352d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private int ngramCount;
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private int hitCount;
382d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected byte spaceChar;
402d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public NGramParser(int[] theNgramList, byte[] theByteMap)
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ngramList = theNgramList;
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            byteMap   = theByteMap;
452d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ngram = 0;
472d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ngramCount = hitCount = 0;
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
502d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /*
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Binary search for value in table, which must have exactly 64 entries.
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static int search(int[] table, int value)
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int index = 0;
572d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (table[index + 32] <= value) {
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index += 32;
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
612d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (table[index + 16] <= value) {
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index += 16;
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (table[index + 8] <= value) {
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index += 8;
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (table[index + 4] <= value) {
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index += 4;
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (table[index + 2] <= value) {
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index += 2;
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (table[index + 1] <= value) {
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index += 1;
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (table[index] > value) {
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index -= 1;
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
852d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (index < 0 || table[index] != value) {
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return -1;
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
892d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return index;
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private void lookup(int thisNgram)
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ngramCount += 1;
962d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (search(ngramList, thisNgram) >= 0) {
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                hitCount += 1;
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1002d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1022d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected void addByte(int b)
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ngram = ((ngram << 8) + (b & 0xFF)) & N_GRAM_MASK;
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            lookup(ngram);
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1082d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private int nextByte(CharsetDetector det)
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (byteIndex >= det.fInputLen) {
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return -1;
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1142d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return det.fInputBytes[byteIndex++] & 0xFF;
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1172d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected void parseCharacters(CharsetDetector det)
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int b;
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            boolean ignoreSpace = false;
1222d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while ((b = nextByte(det)) >= 0) {
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                byte mb = byteMap[b];
1252d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // TODO: 0x20 might not be a space in all character sets...
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (mb != 0) {
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (!(mb == spaceChar && ignoreSpace)) {
1292d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                        addByte(mb);
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1312d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ignoreSpace = (mb == spaceChar);
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1352d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public int parse(CharsetDetector det)
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return parse (det, (byte)0x20);
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public int parse(CharsetDetector det, byte spaceCh)
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
1442d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            this.spaceChar = spaceCh;
1462d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            parseCharacters(det);
1482d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // TODO: Is this OK? The buffer could have ended in the middle of a word...
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            addByte(spaceChar);
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            double rawPercent = (double) hitCount / (double) ngramCount;
1532d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//                if (rawPercent <= 2.0) {
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//                    return 0;
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//                }
1572d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // TODO - This is a bit of a hack to take care of a case
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // were we were getting a confidence of 135...
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (rawPercent > 0.33) {
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return 98;
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1632d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (int) (rawPercent * 300.0);
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1672d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static class NGramParser_IBM420 extends NGramParser
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private byte alef = 0x00;
1712d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected static byte[] unshapeMap = {
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*                 -0           -1           -2           -3           -4           -5           -6           -7           -8           -9           -A           -B           -C           -D           -E           -F   */
1742d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 0- */    (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
1752d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 1- */    (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
1762d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 2- */    (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
1772d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 3- */    (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
1782d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 4- */    (byte) 0x40, (byte) 0x40, (byte) 0x42, (byte) 0x42, (byte) 0x44, (byte) 0x45, (byte) 0x46, (byte) 0x47, (byte) 0x47, (byte) 0x49, (byte) 0x4A, (byte) 0x4B, (byte) 0x4C, (byte) 0x4D, (byte) 0x4E, (byte) 0x4F,
1792d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 5- */    (byte) 0x50, (byte) 0x49, (byte) 0x52, (byte) 0x53, (byte) 0x54, (byte) 0x55, (byte) 0x56, (byte) 0x56, (byte) 0x58, (byte) 0x58, (byte) 0x5A, (byte) 0x5B, (byte) 0x5C, (byte) 0x5D, (byte) 0x5E, (byte) 0x5F,
1802d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 6- */    (byte) 0x60, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x63, (byte) 0x65, (byte) 0x65, (byte) 0x67, (byte) 0x67, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
1812d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 7- */    (byte) 0x69, (byte) 0x71, (byte) 0x71, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, (byte) 0x77, (byte) 0x79, (byte) 0x7A, (byte) 0x7B, (byte) 0x7C, (byte) 0x7D, (byte) 0x7E, (byte) 0x7F,
1822d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 8- */    (byte) 0x80, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x80, (byte) 0x8B, (byte) 0x8B, (byte) 0x8D, (byte) 0x8D, (byte) 0x8F,
1832d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 9- */    (byte) 0x90, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0x9A, (byte) 0x9A, (byte) 0x9A, (byte) 0x9A, (byte) 0x9E, (byte) 0x9E,
1842d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* A- */    (byte) 0x9E, (byte) 0xA1, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0x9E, (byte) 0xAB, (byte) 0xAB, (byte) 0xAD, (byte) 0xAD, (byte) 0xAF,
1852d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* B- */    (byte) 0xAF, (byte) 0xB1, (byte) 0xB2, (byte) 0xB3, (byte) 0xB4, (byte) 0xB5, (byte) 0xB6, (byte) 0xB7, (byte) 0xB8, (byte) 0xB9, (byte) 0xB1, (byte) 0xBB, (byte) 0xBB, (byte) 0xBD, (byte) 0xBD, (byte) 0xBF,
1862d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* C- */    (byte) 0xC0, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7, (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xBF, (byte) 0xCC, (byte) 0xBF, (byte) 0xCE, (byte) 0xCF,
1872d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* D- */    (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7, (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDA, (byte) 0xDC, (byte) 0xDC, (byte) 0xDC, (byte) 0xDF,
1882d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* E- */    (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
1892d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* F- */    (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF,
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
1912d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public NGramParser_IBM420(int[] theNgramList, byte[] theByteMap)
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert           super(theNgramList, theByteMap);
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1972d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private byte isLamAlef(byte b) {
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             if(b == (byte)0xb2 || b == (byte)0xb3){
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 return (byte)0x47;
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             }else if(b == (byte)0xb4 || b == (byte)0xb5){
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 return (byte)0x49;
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             }else if(b == (byte)0xb8 || b == (byte)0xb9){
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 return (byte)0x56;
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             }else
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 return (byte)0x00;
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         }
2082d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /*
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Arabic shaping needs to be done manually. Cannot call ArabicShaping class
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * because CharsetDetector is dealing with bytes not Unicode code points. We could
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * convert the bytes to Unicode code points but that would leave us dependent
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * on CharsetICU which we try to avoid. IBM420 converter amongst different versions
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * of JDK can produce different results and therefore is also avoided.
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         private int nextByte(CharsetDetector det)
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         {
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             if (byteIndex >= det.fInputLen || det.fInputBytes[byteIndex] == 0) {
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 return -1;
2202d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert             }
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int next;
2222d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            alef = isLamAlef(det.fInputBytes[byteIndex]);
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(alef != (byte)0x00)
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                next = 0xB1 & 0xFF;
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            else
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                next = unshapeMap[det.fInputBytes[byteIndex]& 0xFF] & 0xFF;
2282d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            byteIndex++;
2302d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return next;
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         }
2332d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
2342d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert         @Override
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         protected void parseCharacters(CharsetDetector det)
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         {
2372d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert             int b;
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             boolean ignoreSpace = false;
2392d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             while ((b = nextByte(det)) >= 0) {
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 byte mb = byteMap[b];
2422d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 // TODO: 0x20 might not be a space in all character sets...
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 if (mb != 0) {
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     if (!(mb == spaceChar && ignoreSpace)) {
2462d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                         addByte(mb);
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     }
2482d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     ignoreSpace = (mb == spaceChar);
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 }
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 if(alef != (byte)0x00){
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     mb = byteMap[alef & 0xFF];
2532d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     // TODO: 0x20 might not be a space in all character sets...
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     if (mb != 0) {
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         if (!(mb == spaceChar && ignoreSpace)) {
2572d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                             addByte(mb);
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         }
2592d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         ignoreSpace = (mb == spaceChar);
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     }
2622d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 }
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             }
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2672d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
2682d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    int match(CharsetDetector det, int[] ngrams,  byte[] byteMap)
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return match (det, ngrams, byteMap, (byte)0x20);
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2732d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    int match(CharsetDetector det, int[] ngrams,  byte[] byteMap, byte spaceChar)
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        NGramParser parser = new NGramParser(ngrams, byteMap);
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return parser.parse(det, spaceChar);
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2792d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    int matchIBM420(CharsetDetector det, int[] ngrams,  byte[] byteMap, byte spaceChar){
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        NGramParser_IBM420 parser = new NGramParser_IBM420(ngrams, byteMap);
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return parser.parse(det, spaceChar);
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2842d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static class NGramsPlusLang {
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int[] fNGrams;
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String  fLang;
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        NGramsPlusLang(String la, int [] ng) {
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fLang   = la;
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fNGrams = ng;
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static class CharsetRecog_8859_1 extends CharsetRecog_sbcs
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected static byte[] byteMap = {
2972d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
2982d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
2992d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
3002d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
3012d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
3022d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
3032d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
3042d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
3052d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
3062d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
3072d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
3082d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
3092d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
3102d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
3112d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
3122d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
3132d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
3142d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
3152d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
3162d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
3172d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
3182d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0xAA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
3192d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xB5, (byte) 0x20, (byte) 0x20,
3202d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0xBA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
3212d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
3222d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
3232d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20,
3242d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xDF,
3252d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
3262d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
3272d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20,
3282d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF,
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
3302d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
3312d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static NGramsPlusLang[] ngrams_8859_1 = new NGramsPlusLang[] {
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            new NGramsPlusLang(
3342d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                    "da",
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    new int[] {
3362d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620,
3372d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320,
3382d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520,
3392d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572,
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }),
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            new NGramsPlusLang(
3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    "de",
3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    new int[] {
3442d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F,
3452d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220,
3462d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465,
3472d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572,
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }),
3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            new NGramsPlusLang(
3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    "en",
3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    new int[] {
3522d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F,
3532d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74,
3542d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420,
3552d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320,
3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }),
3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            new NGramsPlusLang(
3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    "es",
3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    new int[] {
3612d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
3622d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C,
3632d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064,
3642d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20,
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }),
3662d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            new NGramsPlusLang(
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    "fr",
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    new int[] {
3702d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E,
3712d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20,
3722d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420,
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220,
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }),
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            new NGramsPlusLang(
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    "it",
3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    new int[] {
3792d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073,
3802d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220,
3812d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20,
3822d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F,
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }),
3842d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            new NGramsPlusLang(
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    "nl",
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    new int[] {
3882d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665,
3892d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E,
3902d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F,
3912d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F,
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }),
3932d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            new NGramsPlusLang(
3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    "no",
3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    new int[] {
3972d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469,
3982d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474,
3992d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65,
4002d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572,
4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }),
4022d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            new NGramsPlusLang(
4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    "pt",
4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    new int[] {
4062d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
4072d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20,
4082d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065,
4092d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F,
4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }),
4122d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            new NGramsPlusLang(
4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    "sv",
4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    new int[] {
4162d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469,
4172d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220,
4182d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20,
4192d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220,
4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }),
4212d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4242d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
4252d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public CharsetMatch match(CharsetDetector det)
4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String name = det.fC1Bytes ? "windows-1252" : "ISO-8859-1";
4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int bestConfidenceSoFar = -1;
4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String lang = null;
4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (NGramsPlusLang ngl: ngrams_8859_1) {
4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int confidence = match(det, ngl.fNGrams, byteMap);
4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (confidence > bestConfidenceSoFar) {
4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    bestConfidenceSoFar = confidence;
4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    lang = ngl.fLang;
4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return bestConfidenceSoFar <= 0 ? null : new CharsetMatch(det, this, bestConfidenceSoFar, name, lang);
4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4412d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
4422d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getName()
4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return "ISO-8859-1";
4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4492d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static class CharsetRecog_8859_2 extends CharsetRecog_sbcs
4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected static byte[] byteMap = {
4532d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
4542d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
4552d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
4562d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
4572d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
4582d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
4592d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
4602d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
4612d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
4622d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
4632d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
4642d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
4652d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
4662d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
4672d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
4682d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
4692d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
4702d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
4712d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
4722d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
4732d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0xB1, (byte) 0x20, (byte) 0xB3, (byte) 0x20, (byte) 0xB5, (byte) 0xB6, (byte) 0x20,
4742d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0xB9, (byte) 0xBA, (byte) 0xBB, (byte) 0xBC, (byte) 0x20, (byte) 0xBE, (byte) 0xBF,
4752d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0xB1, (byte) 0x20, (byte) 0xB3, (byte) 0x20, (byte) 0xB5, (byte) 0xB6, (byte) 0xB7,
4762d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0xB9, (byte) 0xBA, (byte) 0xBB, (byte) 0xBC, (byte) 0x20, (byte) 0xBE, (byte) 0xBF,
4772d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
4782d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
4792d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20,
4802d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xDF,
4812d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
4822d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
4832d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20,
4842d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0x20,
4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static NGramsPlusLang[] ngrams_8859_2 = new NGramsPlusLang[] {
4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            new NGramsPlusLang(
4892d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                    "cs",
4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    new int[] {
4912d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F,
4922d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465,
4932d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865,
4942d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564,
4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }),
4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            new NGramsPlusLang(
4972d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                    "hu",
4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    new int[] {
4992d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69,
5002d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20,
5012d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061,
5022d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320,
5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }),
5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            new NGramsPlusLang(
5052d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                    "pl",
5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    new int[] {
5072d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779,
5082d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20,
5092d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769,
5102d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720,
5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }),
5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            new NGramsPlusLang(
5132d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                    "ro",
5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    new int[] {
5152d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69,
5162d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070,
5172d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72,
5182d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                            0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20,
5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    })
5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5222d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public CharsetMatch match(CharsetDetector det)
5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String name = det.fC1Bytes ? "windows-1250" : "ISO-8859-2";
5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int bestConfidenceSoFar = -1;
5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String lang = null;
5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (NGramsPlusLang ngl: ngrams_8859_2) {
5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int confidence = match(det, ngl.fNGrams, byteMap);
5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (confidence > bestConfidenceSoFar) {
5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    bestConfidenceSoFar = confidence;
5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    lang = ngl.fLang;
5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
5347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return bestConfidenceSoFar <= 0 ? null : new CharsetMatch(det, this, bestConfidenceSoFar, name, lang);
5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5382d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getName()
5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return "ISO-8859-2";
5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5452d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
5462d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    abstract static class CharsetRecog_8859_5 extends CharsetRecog_sbcs
5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected static byte[] byteMap = {
5502d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
5512d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
5522d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
5532d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
5542d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
5552d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
5562d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
5572d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
5582d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
5592d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
5602d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
5612d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
5622d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
5632d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
5642d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
5652d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
5662d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
5672d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
5682d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
5692d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
5702d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7,
5712d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0x20, (byte) 0xFE, (byte) 0xFF,
5722d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7,
5732d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF,
5742d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
5752d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
5762d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7,
5772d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF,
5782d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
5792d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
5802d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7,
5812d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0x20, (byte) 0xFE, (byte) 0xFF,
5827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
5837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5842d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
5857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getName()
5867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
5877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return "ISO-8859-5";
5887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5902d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
5917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static class CharsetRecog_8859_5_ru extends CharsetRecog_8859_5
5927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
5937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static int[] ngrams = {
5942d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE,
5952d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD,
5962d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2,
5972d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520,
5987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
5997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6002d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
6017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getLanguage()
6027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
6037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return "ru";
6047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6052d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
6062d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
6077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public CharsetMatch match(CharsetDetector det)
6087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
6097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int confidence = match(det, ngrams, byteMap);
6107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
6117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6132d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
6147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    abstract static class CharsetRecog_8859_6 extends CharsetRecog_sbcs
6157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
6167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected static byte[] byteMap = {
6172d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6182d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6192d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6202d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6212d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
6222d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6232d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6242d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6252d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
6262d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
6272d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
6282d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6292d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
6302d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
6312d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
6322d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6332d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6342d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6352d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6362d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6372d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6382d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6392d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6402d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6412d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7,
6422d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, (byte) 0xCD, (byte) 0xCE, (byte) 0xCF,
6432d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7,
6442d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6452d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
6462d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6472d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6482d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
6507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6512d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
6527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getName()
6537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
6547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return "ISO-8859-6";
6557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6572d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
6587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static class CharsetRecog_8859_6_ar extends CharsetRecog_8859_6
6597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
6607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static int[] ngrams = {
6612d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8,
6622d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1,
6632d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20,
6642d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620,
6657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
6667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6672d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
6687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getLanguage()
6697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
6707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return "ar";
6717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6722d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
6732d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
6747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public CharsetMatch match(CharsetDetector det)
6757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
6767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int confidence = match(det, ngrams, byteMap);
6777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
6787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6802d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
6817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    abstract static class CharsetRecog_8859_7 extends CharsetRecog_sbcs
6827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
6837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected static byte[] byteMap = {
6842d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6852d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6862d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6872d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6882d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
6892d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6902d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6912d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6922d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
6932d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
6942d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
6952d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
6962d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
6972d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
6982d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
6992d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7002d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7012d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7022d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7032d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7042d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0xA1, (byte) 0xA2, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7052d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7062d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xDC, (byte) 0x20,
7072d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, (byte) 0x20, (byte) 0xFC, (byte) 0x20, (byte) 0xFD, (byte) 0xFE,
7082d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xC0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
7092d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
7102d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xF0, (byte) 0xF1, (byte) 0x20, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7,
7112d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF,
7122d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
7132d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
7142d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7,
7152d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0x20,
7167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
7177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7182d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
7197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getName()
7207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
7217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return "ISO-8859-7";
7227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7242d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
7257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static class CharsetRecog_8859_7_el extends CharsetRecog_8859_7
7267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
7277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static int[] ngrams = {
7282d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7,
7292d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120,
7302d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5,
7312d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20,
7327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
7337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7342d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
7357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getLanguage()
7367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
7377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return "el";
7387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7392d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
7402d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
7417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public CharsetMatch match(CharsetDetector det)
7427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
7437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String name = det.fC1Bytes ?  "windows-1253" : "ISO-8859-7";
7447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int confidence = match(det, ngrams, byteMap);
7457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return confidence == 0 ? null : new CharsetMatch(det, this, confidence, name, "el");
7467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7482d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
7497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    abstract static class CharsetRecog_8859_8 extends CharsetRecog_sbcs
7507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
7517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected static byte[] byteMap = {
7522d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7532d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7542d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7552d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7562d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
7572d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7582d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7592d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7602d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
7612d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
7622d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
7632d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7642d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
7652d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
7662d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
7672d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7682d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7692d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7702d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7712d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7722d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7732d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7742d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xB5, (byte) 0x20, (byte) 0x20,
7752d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7762d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7772d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7782d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7792d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7802d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
7812d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
7822d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7,
7832d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
7847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
7857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7862d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
7877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getName()
7887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
7897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return "ISO-8859-8";
7907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7922d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
7937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static class CharsetRecog_8859_8_I_he extends CharsetRecog_8859_8
7947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
7957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static int[] ngrams = {
7962d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0,
7972d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4,
7982d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE,
7992d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9,
8007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
8017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8022d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
8037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getName()
8047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
8057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return "ISO-8859-8-I";
8067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8082d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
8097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getLanguage()
8107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
8117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return "he";
8127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8132d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
8142d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
8157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public CharsetMatch match(CharsetDetector det)
8167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
8177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String name = det.fC1Bytes ? "windows-1255" : "ISO-8859-8-I";
8187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int confidence = match(det, ngrams, byteMap);
8197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return confidence == 0 ? null : new CharsetMatch(det, this, confidence, name, "he");
8207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8222d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
8237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static class CharsetRecog_8859_8_he extends CharsetRecog_8859_8
8247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
8257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static int[] ngrams = {
8262d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0,
8272d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC,
8282d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920,
8292d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9,
8307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
8317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8322d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
8337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getLanguage()
8347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
8357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return "he";
8367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8372d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
8382d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
8397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public CharsetMatch match(CharsetDetector det)
8407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
8417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String name = det.fC1Bytes ? "windows-1255" : "ISO-8859-8";
8427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int confidence = match(det, ngrams, byteMap);
8437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return confidence == 0 ? null : new CharsetMatch(det, this, confidence, name, "he");
8447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8472d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
8487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    abstract static class CharsetRecog_8859_9 extends CharsetRecog_sbcs
8497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
8507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected static byte[] byteMap = {
8512d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
8522d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
8532d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
8542d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
8552d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
8562d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
8572d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
8582d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
8592d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
8602d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
8612d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
8622d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
8632d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
8642d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
8652d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
8662d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
8672d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
8682d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
8692d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
8702d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
8712d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
8722d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0xAA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
8732d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xB5, (byte) 0x20, (byte) 0x20,
8742d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0xBA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
8752d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
8762d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
8772d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20,
8782d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0x69, (byte) 0xFE, (byte) 0xDF,
8792d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
8802d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
8812d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20,
8822d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF,
8837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
8847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8852d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
8867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getName()
8877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
8887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return "ISO-8859-9";
8897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8912d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
8927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static class CharsetRecog_8859_9_tr extends CharsetRecog_8859_9
8937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
8947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static int[] ngrams = {
8952d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961,
8962d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062,
8972d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062,
8982d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD,
8997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
9007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9012d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
9027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getLanguage()
9037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
9047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return "tr";
9057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9062d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
9072d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
9087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public CharsetMatch match(CharsetDetector det)
9097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
9107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String name = det.fC1Bytes ? "windows-1254" : "ISO-8859-9";
9117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int confidence = match(det, ngrams, byteMap);
9127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return confidence == 0 ? null : new CharsetMatch(det, this, confidence, name, "tr");
9137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9152d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
9167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static class CharsetRecog_windows_1251 extends CharsetRecog_sbcs
9177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
9187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static int[] ngrams = {
9192d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE,
9202d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED,
9212d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2,
9222d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520,
9237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
9247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static byte[] byteMap = {
9262d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
9272d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
9282d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
9292d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
9302d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
9312d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
9322d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
9332d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
9342d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
9352d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
9362d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
9372d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
9382d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
9392d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
9402d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
9412d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
9422d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x90, (byte) 0x83, (byte) 0x20, (byte) 0x83, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
9432d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x9A, (byte) 0x20, (byte) 0x9C, (byte) 0x9D, (byte) 0x9E, (byte) 0x9F,
9442d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x90, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
9452d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x9A, (byte) 0x20, (byte) 0x9C, (byte) 0x9D, (byte) 0x9E, (byte) 0x9F,
9462d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0xA2, (byte) 0xA2, (byte) 0xBC, (byte) 0x20, (byte) 0xB4, (byte) 0x20, (byte) 0x20,
9472d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xB8, (byte) 0x20, (byte) 0xBA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xBF,
9482d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0xB3, (byte) 0xB3, (byte) 0xB4, (byte) 0xB5, (byte) 0x20, (byte) 0x20,
9492d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xB8, (byte) 0x20, (byte) 0xBA, (byte) 0x20, (byte) 0xBC, (byte) 0xBE, (byte) 0xBE, (byte) 0xBF,
9502d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
9512d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
9522d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7,
9532d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF,
9542d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
9552d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
9562d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7,
9572d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF,
9587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
9597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9602d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
9617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getName()
9627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
9637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return  "windows-1251";
9647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9652d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
9662d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
9677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getLanguage()
9687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
9697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return "ru";
9707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9712d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
9722d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
9737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public CharsetMatch match(CharsetDetector det)
9747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
9757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int confidence = match(det, ngrams, byteMap);
9767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
9777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9792d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
9807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static class CharsetRecog_windows_1256 extends CharsetRecog_sbcs
9817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
9827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static int[] ngrams = {
9832d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8,
9842d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD,
9852d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20,
9862d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420,
9877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
9887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static byte[] byteMap = {
9902d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
9912d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
9922d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
9932d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
9942d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
9952d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
9962d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
9972d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
9982d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
9992d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
10002d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
10012d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10022d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
10032d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
10042d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
10052d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10062d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x81, (byte) 0x20, (byte) 0x83, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10072d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x88, (byte) 0x20, (byte) 0x8A, (byte) 0x20, (byte) 0x9C, (byte) 0x8D, (byte) 0x8E, (byte) 0x8F,
10082d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x90, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10092d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x98, (byte) 0x20, (byte) 0x9A, (byte) 0x20, (byte) 0x9C, (byte) 0x20, (byte) 0x20, (byte) 0x9F,
10102d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10112d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0xAA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10122d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xB5, (byte) 0x20, (byte) 0x20,
10132d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10142d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xC0, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7,
10152d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, (byte) 0xCD, (byte) 0xCE, (byte) 0xCF,
10162d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0x20,
10172d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF,
10182d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
10192d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
10202d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xF4, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10212d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0xF9, (byte) 0x20, (byte) 0xFB, (byte) 0xFC, (byte) 0x20, (byte) 0x20, (byte) 0xFF,
10227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
10237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10242d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
10257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getName()
10267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
10277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return  "windows-1256";
10287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
10292d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
10302d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
10317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getLanguage()
10327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
10337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return "ar";
10347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
10352d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
10362d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
10377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public CharsetMatch match(CharsetDetector det)
10387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
10397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int confidence = match(det, ngrams, byteMap);
10407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
10417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
10427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10432d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
10447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static class CharsetRecog_KOI8_R extends CharsetRecog_sbcs
10457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
10467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static int[] ngrams = {
10472d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1,
10482d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE,
10492d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1,
10502d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF,
10517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
10527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static byte[] byteMap = {
10542d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10552d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10562d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10572d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10582d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
10592d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10602d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10612d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10622d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
10632d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
10642d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
10652d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10662d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
10672d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
10682d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
10692d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10702d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10712d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10722d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10732d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10742d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xA3, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10752d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10762d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xA3, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10772d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
10782d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xC0, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7,
10792d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, (byte) 0xCD, (byte) 0xCE, (byte) 0xCF,
10802d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7,
10812d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF,
10822d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xC0, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7,
10832d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, (byte) 0xCD, (byte) 0xCE, (byte) 0xCF,
10842d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7,
10852d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF,
10867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
10872d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
10882d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
10897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getName()
10907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
10917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return  "KOI8-R";
10927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
10932d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
10942d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
10957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getLanguage()
10967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
10977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return "ru";
10987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
10992d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
11002d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
11017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public CharsetMatch match(CharsetDetector det)
11027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
11037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int confidence = match(det, ngrams, byteMap);
11047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
11057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
11067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
11072d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
11087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    abstract static class CharsetRecog_IBM424_he extends CharsetRecog_sbcs
11097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
11107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected static byte[] byteMap = {
11117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*                 -0           -1           -2           -3           -4           -5           -6           -7           -8           -9           -A           -B           -C           -D           -E           -F   */
11122d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 0- */    (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
11132d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 1- */    (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
11142d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 2- */    (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
11152d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 3- */    (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
11162d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 4- */    (byte) 0x40, (byte) 0x41, (byte) 0x42, (byte) 0x43, (byte) 0x44, (byte) 0x45, (byte) 0x46, (byte) 0x47, (byte) 0x48, (byte) 0x49, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
11172d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 5- */    (byte) 0x40, (byte) 0x51, (byte) 0x52, (byte) 0x53, (byte) 0x54, (byte) 0x55, (byte) 0x56, (byte) 0x57, (byte) 0x58, (byte) 0x59, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
11182d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 6- */    (byte) 0x40, (byte) 0x40, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, (byte) 0x68, (byte) 0x69, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
11192d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 7- */    (byte) 0x40, (byte) 0x71, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x00, (byte) 0x40, (byte) 0x40,
11202d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 8- */    (byte) 0x40, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
11212d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 9- */    (byte) 0x40, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
11222d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* A- */    (byte) 0xA0, (byte) 0x40, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
11232d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* B- */    (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
11242d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* C- */    (byte) 0x40, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
11252d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* D- */    (byte) 0x40, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
11262d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* E- */    (byte) 0x40, (byte) 0x40, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
11272d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* F- */    (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
11287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
11297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11302d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
11317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getLanguage()
11327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
11337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return "he";
11347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
11357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
11362d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    static class CharsetRecog_IBM424_he_rtl extends CharsetRecog_IBM424_he
11377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
11382d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
11397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getName()
11407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
11417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return "IBM424_rtl";
11427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
11437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static int[] ngrams = {
11442d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641,
11452d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045,
11462d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056,
11472d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069,
11487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
11492d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
11507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public CharsetMatch match(CharsetDetector det)
11517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
11527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int confidence = match(det, ngrams, byteMap, (byte)0x40);
11537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
11547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
11557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
11562d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    static class CharsetRecog_IBM424_he_ltr extends CharsetRecog_IBM424_he
11577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
11582d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
11597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getName()
11607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
11617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return "IBM424_ltr";
11627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
11637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static int[] ngrams = {
11647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x405462, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x407141,
11657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x414645, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x454054,
11667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            0x454055, 0x454057, 0x454068, 0x454071, 0x455440, 0x464140, 0x464540, 0x484140, 0x514140, 0x514240, 0x514540, 0x544045, 0x544055, 0x544071, 0x546240, 0x546940,
11677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            0x555151, 0x555158, 0x555168, 0x564045, 0x564055, 0x564071, 0x564240, 0x564540, 0x624540, 0x694045, 0x694055, 0x694071, 0x694540, 0x714140, 0x714540, 0x714651
11687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
11702d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
11717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public CharsetMatch match(CharsetDetector det)
11727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
11737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int confidence = match(det, ngrams, byteMap, (byte)0x40);
11747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
11757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
11767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
11772d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
11787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    abstract static class CharsetRecog_IBM420_ar extends CharsetRecog_sbcs
11797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
11807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected static byte[] byteMap = {
11827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*                 -0           -1           -2           -3           -4           -5           -6           -7           -8           -9           -A           -B           -C           -D           -E           -F   */
11832d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 0- */    (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
11842d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 1- */    (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
11852d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 2- */    (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
11862d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 3- */    (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
11872d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 4- */    (byte) 0x40, (byte) 0x40, (byte) 0x42, (byte) 0x43, (byte) 0x44, (byte) 0x45, (byte) 0x46, (byte) 0x47, (byte) 0x48, (byte) 0x49, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
11882d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 5- */    (byte) 0x40, (byte) 0x51, (byte) 0x52, (byte) 0x40, (byte) 0x40, (byte) 0x55, (byte) 0x56, (byte) 0x57, (byte) 0x58, (byte) 0x59, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
11892d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 6- */    (byte) 0x40, (byte) 0x40, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, (byte) 0x68, (byte) 0x69, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
11902d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 7- */    (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, (byte) 0x78, (byte) 0x79, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
11912d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 8- */    (byte) 0x80, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x8A, (byte) 0x8B, (byte) 0x8C, (byte) 0x8D, (byte) 0x8E, (byte) 0x8F,
11922d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* 9- */    (byte) 0x90, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0x9A, (byte) 0x9B, (byte) 0x9C, (byte) 0x9D, (byte) 0x9E, (byte) 0x9F,
11932d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* A- */    (byte) 0xA0, (byte) 0x40, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0xAA, (byte) 0xAB, (byte) 0xAC, (byte) 0xAD, (byte) 0xAE, (byte) 0xAF,
11942d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* B- */    (byte) 0xB0, (byte) 0xB1, (byte) 0xB2, (byte) 0xB3, (byte) 0xB4, (byte) 0xB5, (byte) 0x40, (byte) 0x40, (byte) 0xB8, (byte) 0xB9, (byte) 0xBA, (byte) 0xBB, (byte) 0xBC, (byte) 0xBD, (byte) 0xBE, (byte) 0xBF,
11952d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* C- */    (byte) 0x40, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x40, (byte) 0xCB, (byte) 0x40, (byte) 0xCD, (byte) 0x40, (byte) 0xCF,
11962d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* D- */    (byte) 0x40, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF,
11972d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* E- */    (byte) 0x40, (byte) 0x40, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0xEA, (byte) 0xEB, (byte) 0x40, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
11982d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert/* F- */    (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0x40,
11997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
12007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12012d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
12022d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
12037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getLanguage()
12047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
12057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return "ar";
12067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
12072d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
12087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
12092d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    static class CharsetRecog_IBM420_ar_rtl extends CharsetRecog_IBM420_ar
12107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
12117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static int[] ngrams = {
12127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            0x4056B1, 0x4056BD, 0x405856, 0x409AB1, 0x40ABDC, 0x40B1B1, 0x40BBBD, 0x40CF56, 0x564056, 0x564640, 0x566340, 0x567540, 0x56B140, 0x56B149, 0x56B156, 0x56B158,
12137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            0x56B163, 0x56B167, 0x56B169, 0x56B173, 0x56B178, 0x56B19A, 0x56B1AD, 0x56B1BB, 0x56B1CF, 0x56B1DC, 0x56BB40, 0x56BD40, 0x56BD63, 0x584056, 0x624056, 0x6240AB,
12147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            0x6240B1, 0x6240BB, 0x6240CF, 0x634056, 0x734056, 0x736240, 0x754056, 0x756240, 0x784056, 0x9A4056, 0x9AB1DA, 0xABDC40, 0xB14056, 0xB16240, 0xB1DA40, 0xB1DC40,
12157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            0xBB4056, 0xBB5640, 0xBB6240, 0xBBBD40, 0xBD4056, 0xBF4056, 0xBF5640, 0xCF56B1, 0xCFBD40, 0xDA4056, 0xDC4056, 0xDC40BB, 0xDC40CF, 0xDC6240, 0xDC7540, 0xDCBD40,
12167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
12177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12182d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
12197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getName()
12207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
12217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return "IBM420_rtl";
12227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
12232d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
12247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public CharsetMatch match(CharsetDetector det)
12257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
12267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int confidence =  matchIBM420(det, ngrams, byteMap, (byte)0x40);
12277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
12287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
12292d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
12307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
12312d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    static class CharsetRecog_IBM420_ar_ltr extends CharsetRecog_IBM420_ar
12327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
12337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static int[] ngrams = {
12342d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF,
12357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            0x40BDDC, 0x40DAB1, 0x40DCAB, 0x40DCB1, 0x49B156, 0x564056, 0x564058, 0x564062, 0x564063, 0x564073, 0x564075, 0x564078, 0x56409A, 0x5640B1, 0x5640BB, 0x5640BD,
12367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            0x5640BF, 0x5640DA, 0x5640DC, 0x565840, 0x56B156, 0x56CF40, 0x58B156, 0x63B156, 0x63BD56, 0x67B156, 0x69B156, 0x73B156, 0x78B156, 0x9AB156, 0xAB4062, 0xADB156,
12377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            0xB14062, 0xB15640, 0xB156CF, 0xB19A40, 0xB1B140, 0xBB4062, 0xBB40DC, 0xBBB156, 0xBD5640, 0xBDBB40, 0xCF4062, 0xCF40DC, 0xCFB156, 0xDAB19A, 0xDCAB40, 0xDCB156
12387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
12397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12402d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
12417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getName()
12427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
12437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return "IBM420_ltr";
12447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
12452d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        @Override
12467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public CharsetMatch match(CharsetDetector det)
12477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
12487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int confidence = matchIBM420(det, ngrams, byteMap, (byte)0x40);
12497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
12507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
12512d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
12527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
12537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
1254