1/* GENERATED SOURCE. DO NOT MODIFY. */ 2// © 2016 and later: Unicode, Inc. and others. 3// License & terms of use: http://www.unicode.org/copyright.html#License 4/** 5******************************************************************************* 6* Copyright (C) 2005-2016, International Business Machines Corporation and * 7* others. All Rights Reserved. * 8******************************************************************************* 9*/ 10package android.icu.text; 11 12import java.io.ByteArrayInputStream; 13import java.io.IOException; 14import java.io.InputStream; 15import java.io.InputStreamReader; 16import java.io.Reader; 17 18 19/** 20 * This class represents a charset that has been identified by a CharsetDetector 21 * as a possible encoding for a set of input data. From an instance of this 22 * class, you can ask for a confidence level in the charset identification, 23 * or for Java Reader or String to access the original byte data in Unicode form. 24 * <p> 25 * Instances of this class are created only by CharsetDetectors. 26 * <p> 27 * Note: this class has a natural ordering that is inconsistent with equals. 28 * The natural ordering is based on the match confidence value. 29 * 30 * @hide Only a subset of ICU is exposed in Android 31 */ 32public class CharsetMatch implements Comparable<CharsetMatch> { 33 34 35 /** 36 * Create a java.io.Reader for reading the Unicode character data corresponding 37 * to the original byte data supplied to the Charset detect operation. 38 * <p> 39 * CAUTION: if the source of the byte data was an InputStream, a Reader 40 * can be created for only one matching char set using this method. If more 41 * than one charset needs to be tried, the caller will need to reset 42 * the InputStream and create InputStreamReaders itself, based on the charset name. 43 * 44 * @return the Reader for the Unicode character data. 45 */ 46 public Reader getReader() { 47 InputStream inputStream = fInputStream; 48 49 if (inputStream == null) { 50 inputStream = new ByteArrayInputStream(fRawInput, 0, fRawLength); 51 } 52 53 try { 54 inputStream.reset(); 55 return new InputStreamReader(inputStream, getName()); 56 } catch (IOException e) { 57 return null; 58 } 59 } 60 61 /** 62 * Create a Java String from Unicode character data corresponding 63 * to the original byte data supplied to the Charset detect operation. 64 * 65 * @return a String created from the converted input data. 66 */ 67 public String getString() throws java.io.IOException { 68 return getString(-1); 69 70 } 71 72 /** 73 * Create a Java String from Unicode character data corresponding 74 * to the original byte data supplied to the Charset detect operation. 75 * The length of the returned string is limited to the specified size; 76 * the string will be trunctated to this length if necessary. A limit value of 77 * zero or less is ignored, and treated as no limit. 78 * 79 * @param maxLength The maximium length of the String to be created when the 80 * source of the data is an input stream, or -1 for 81 * unlimited length. 82 * @return a String created from the converted input data. 83 */ 84 public String getString(int maxLength) throws java.io.IOException { 85 String result = null; 86 if (fInputStream != null) { 87 StringBuilder sb = new StringBuilder(); 88 char[] buffer = new char[1024]; 89 Reader reader = getReader(); 90 int max = maxLength < 0? Integer.MAX_VALUE : maxLength; 91 int bytesRead = 0; 92 93 while ((bytesRead = reader.read(buffer, 0, Math.min(max, 1024))) >= 0) { 94 sb.append(buffer, 0, bytesRead); 95 max -= bytesRead; 96 } 97 98 reader.close(); 99 100 return sb.toString(); 101 } else { 102 String name = getName(); 103 /* 104 * getName() may return a name with a suffix 'rtl' or 'ltr'. This cannot 105 * be used to open a charset (e.g. IBM424_rtl). The ending '_rtl' or 'ltr' 106 * should be stripped off before creating the string. 107 */ 108 int startSuffix = name.indexOf("_rtl") < 0 ? name.indexOf("_ltr") : name.indexOf("_rtl"); 109 if (startSuffix > 0) { 110 name = name.substring(0, startSuffix); 111 } 112 result = new String(fRawInput, name); 113 } 114 return result; 115 116 } 117 118 /** 119 * Get an indication of the confidence in the charset detected. 120 * Confidence values range from 0-100, with larger numbers indicating 121 * a better match of the input data to the characteristics of the 122 * charset. 123 * 124 * @return the confidence in the charset match 125 */ 126 public int getConfidence() { 127 return fConfidence; 128 } 129 130 /** 131 * Get the name of the detected charset. 132 * The name will be one that can be used with other APIs on the 133 * platform that accept charset names. It is the "Canonical name" 134 * as defined by the class java.nio.charset.Charset; for 135 * charsets that are registered with the IANA charset registry, 136 * this is the MIME-preferred registerd name. 137 * 138 * @see java.nio.charset.Charset 139 * @see java.io.InputStreamReader 140 * 141 * @return The name of the charset. 142 */ 143 public String getName() { 144 return fCharsetName; 145 } 146 147 /** 148 * Get the ISO code for the language of the detected charset. 149 * 150 * @return The ISO code for the language or <code>null</code> if the language cannot be determined. 151 */ 152 public String getLanguage() { 153 return fLang; 154 } 155 156 /** 157 * Compare to other CharsetMatch objects. 158 * Comparison is based on the match confidence value, which 159 * allows CharsetDetector.detectAll() to order its results. 160 * 161 * @param other the CharsetMatch object to compare against. 162 * @return a negative integer, zero, or a positive integer as the 163 * confidence level of this CharsetMatch 164 * is less than, equal to, or greater than that of 165 * the argument. 166 * @throws ClassCastException if the argument is not a CharsetMatch. 167 */ 168 @Override 169 public int compareTo (CharsetMatch other) { 170 int compareResult = 0; 171 if (this.fConfidence > other.fConfidence) { 172 compareResult = 1; 173 } else if (this.fConfidence < other.fConfidence) { 174 compareResult = -1; 175 } 176 return compareResult; 177 } 178 179 /* 180 * Constructor. Implementation internal 181 */ 182 CharsetMatch(CharsetDetector det, CharsetRecognizer rec, int conf) { 183 fConfidence = conf; 184 185 // The references to the original application input data must be copied out 186 // of the charset recognizer to here, in case the application resets the 187 // recognizer before using this CharsetMatch. 188 if (det.fInputStream == null) { 189 // We only want the existing input byte data if it came straight from the user, 190 // not if is just the head of a stream. 191 fRawInput = det.fRawInput; 192 fRawLength = det.fRawLength; 193 } 194 fInputStream = det.fInputStream; 195 fCharsetName = rec.getName(); 196 fLang = rec.getLanguage(); 197 } 198 199 /* 200 * Constructor. Implementation internal 201 */ 202 CharsetMatch(CharsetDetector det, CharsetRecognizer rec, int conf, String csName, String lang) { 203 fConfidence = conf; 204 205 // The references to the original application input data must be copied out 206 // of the charset recognizer to here, in case the application resets the 207 // recognizer before using this CharsetMatch. 208 if (det.fInputStream == null) { 209 // We only want the existing input byte data if it came straight from the user, 210 // not if is just the head of a stream. 211 fRawInput = det.fRawInput; 212 fRawLength = det.fRawLength; 213 } 214 fInputStream = det.fInputStream; 215 fCharsetName = csName; 216 fLang = lang; 217 } 218 219 220 // 221 // Private Data 222 // 223 private int fConfidence; 224 private byte[] fRawInput = null; // Original, untouched input bytes. 225 // If user gave us a byte array, this is it. 226 private int fRawLength; // Length of data in fRawInput array. 227 228 private InputStream fInputStream = null; // User's input stream, or null if the user 229 // gave us a byte array. 230 231 private String fCharsetName; // The name of the charset this CharsetMatch 232 // represents. Filled in by the recognizer. 233 private String fLang; // The language, if one was determined by 234 // the recognizer during the detect operation. 235} 236