1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/**
5*******************************************************************************
6* Copyright (C) 2005-2016, International Business Machines Corporation and    *
7* others. All Rights Reserved.                                                *
8*******************************************************************************
9*/
10package android.icu.text;
11
12import java.io.ByteArrayInputStream;
13import java.io.IOException;
14import java.io.InputStream;
15import java.io.InputStreamReader;
16import java.io.Reader;
17
18
19/**
20 * This class represents a charset that has been identified by a CharsetDetector
21 * as a possible encoding for a set of input data.  From an instance of this
22 * class, you can ask for a confidence level in the charset identification,
23 * or for Java Reader or String to access the original byte data in Unicode form.
24 * <p>
25 * Instances of this class are created only by CharsetDetectors.
26 * <p>
27 * Note:  this class has a natural ordering that is inconsistent with equals.
28 *        The natural ordering is based on the match confidence value.
29 *
30 * @hide Only a subset of ICU is exposed in Android
31 */
32public class CharsetMatch implements Comparable<CharsetMatch> {
33
34
35    /**
36     * Create a java.io.Reader for reading the Unicode character data corresponding
37     * to the original byte data supplied to the Charset detect operation.
38     * <p>
39     * CAUTION:  if the source of the byte data was an InputStream, a Reader
40     * can be created for only one matching char set using this method.  If more
41     * than one charset needs to be tried, the caller will need to reset
42     * the InputStream and create InputStreamReaders itself, based on the charset name.
43     *
44     * @return the Reader for the Unicode character data.
45     */
46    public Reader getReader() {
47        InputStream inputStream = fInputStream;
48
49        if (inputStream == null) {
50            inputStream = new ByteArrayInputStream(fRawInput, 0, fRawLength);
51        }
52
53        try {
54            inputStream.reset();
55            return new InputStreamReader(inputStream, getName());
56        } catch (IOException e) {
57            return null;
58        }
59    }
60
61    /**
62     * Create a Java String from Unicode character data corresponding
63     * to the original byte data supplied to the Charset detect operation.
64     *
65     * @return a String created from the converted input data.
66     */
67    public String getString()  throws java.io.IOException {
68        return getString(-1);
69
70    }
71
72    /**
73     * Create a Java String from Unicode character data corresponding
74     * to the original byte data supplied to the Charset detect operation.
75     * The length of the returned string is limited to the specified size;
76     * the string will be trunctated to this length if necessary.  A limit value of
77     * zero or less is ignored, and treated as no limit.
78     *
79     * @param maxLength The maximium length of the String to be created when the
80     *                  source of the data is an input stream, or -1 for
81     *                  unlimited length.
82     * @return a String created from the converted input data.
83     */
84    public String getString(int maxLength) throws java.io.IOException {
85        String result = null;
86        if (fInputStream != null) {
87            StringBuilder sb = new StringBuilder();
88            char[] buffer = new char[1024];
89            Reader reader = getReader();
90            int max = maxLength < 0? Integer.MAX_VALUE : maxLength;
91            int bytesRead = 0;
92
93            while ((bytesRead = reader.read(buffer, 0, Math.min(max, 1024))) >= 0) {
94                sb.append(buffer, 0, bytesRead);
95                max -= bytesRead;
96            }
97
98            reader.close();
99
100            return sb.toString();
101        } else {
102            String name = getName();
103            /*
104             * getName() may return a name with a suffix 'rtl' or 'ltr'. This cannot
105             * be used to open a charset (e.g. IBM424_rtl). The ending '_rtl' or 'ltr'
106             * should be stripped off before creating the string.
107             */
108            int startSuffix = name.indexOf("_rtl") < 0 ? name.indexOf("_ltr") : name.indexOf("_rtl");
109            if (startSuffix > 0) {
110                name = name.substring(0, startSuffix);
111            }
112            result = new String(fRawInput, name);
113        }
114        return result;
115
116    }
117
118    /**
119     * Get an indication of the confidence in the charset detected.
120     * Confidence values range from 0-100, with larger numbers indicating
121     * a better match of the input data to the characteristics of the
122     * charset.
123     *
124     * @return the confidence in the charset match
125     */
126    public int getConfidence() {
127        return fConfidence;
128    }
129
130    /**
131     * Get the name of the detected charset.
132     * The name will be one that can be used with other APIs on the
133     * platform that accept charset names.  It is the "Canonical name"
134     * as defined by the class java.nio.charset.Charset; for
135     * charsets that are registered with the IANA charset registry,
136     * this is the MIME-preferred registerd name.
137     *
138     * @see java.nio.charset.Charset
139     * @see java.io.InputStreamReader
140     *
141     * @return The name of the charset.
142     */
143    public String getName() {
144        return fCharsetName;
145    }
146
147    /**
148     * Get the ISO code for the language of the detected charset.
149     *
150     * @return The ISO code for the language or <code>null</code> if the language cannot be determined.
151     */
152    public String getLanguage() {
153        return fLang;
154    }
155
156    /**
157     * Compare to other CharsetMatch objects.
158     * Comparison is based on the match confidence value, which
159     *   allows CharsetDetector.detectAll() to order its results.
160     *
161     * @param other the CharsetMatch object to compare against.
162     * @return  a negative integer, zero, or a positive integer as the
163     *          confidence level of this CharsetMatch
164     *          is less than, equal to, or greater than that of
165     *          the argument.
166     * @throws ClassCastException if the argument is not a CharsetMatch.
167     */
168    @Override
169    public int compareTo (CharsetMatch other) {
170        int compareResult = 0;
171        if (this.fConfidence > other.fConfidence) {
172            compareResult = 1;
173        } else if (this.fConfidence < other.fConfidence) {
174            compareResult = -1;
175        }
176        return compareResult;
177    }
178
179    /*
180     *  Constructor.  Implementation internal
181     */
182    CharsetMatch(CharsetDetector det, CharsetRecognizer rec, int conf) {
183        fConfidence = conf;
184
185        // The references to the original application input data must be copied out
186        //   of the charset recognizer to here, in case the application resets the
187        //   recognizer before using this CharsetMatch.
188        if (det.fInputStream == null) {
189            // We only want the existing input byte data if it came straight from the user,
190            //   not if is just the head of a stream.
191            fRawInput    = det.fRawInput;
192            fRawLength   = det.fRawLength;
193        }
194        fInputStream = det.fInputStream;
195        fCharsetName = rec.getName();
196        fLang = rec.getLanguage();
197    }
198
199    /*
200     *  Constructor.  Implementation internal
201     */
202    CharsetMatch(CharsetDetector det, CharsetRecognizer rec, int conf, String csName, String lang) {
203        fConfidence = conf;
204
205        // The references to the original application input data must be copied out
206        //   of the charset recognizer to here, in case the application resets the
207        //   recognizer before using this CharsetMatch.
208        if (det.fInputStream == null) {
209            // We only want the existing input byte data if it came straight from the user,
210            //   not if is just the head of a stream.
211            fRawInput    = det.fRawInput;
212            fRawLength   = det.fRawLength;
213        }
214        fInputStream = det.fInputStream;
215        fCharsetName = csName;
216        fLang = lang;
217    }
218
219
220    //
221    //   Private Data
222    //
223    private int                 fConfidence;
224    private byte[]              fRawInput = null;     // Original, untouched input bytes.
225                                                      //  If user gave us a byte array, this is it.
226    private int                 fRawLength;           // Length of data in fRawInput array.
227
228    private InputStream         fInputStream = null;  // User's input stream, or null if the user
229                                                      //   gave us a byte array.
230
231    private String              fCharsetName;         // The name of the charset this CharsetMatch
232                                                      //   represents.  Filled in by the recognizer.
233    private String              fLang;                // The language, if one was determined by
234                                                      //   the recognizer during the detect operation.
235}
236