Charset.java revision 5cd6df2f627e06f9b7f714181d70d3148a3d6c60
1adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project/* 2adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * Licensed to the Apache Software Foundation (ASF) under one or more 3adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * contributor license agreements. See the NOTICE file distributed with 4adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * this work for additional information regarding copyright ownership. 5adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * The ASF licenses this file to You under the Apache License, Version 2.0 6adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * (the "License"); you may not use this file except in compliance with 7adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * the License. You may obtain a copy of the License at 8adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * 9adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * http://www.apache.org/licenses/LICENSE-2.0 10adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * 11adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * Unless required by applicable law or agreed to in writing, software 12adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * distributed under the License is distributed on an "AS IS" BASIS, 13adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * See the License for the specific language governing permissions and 15adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * limitations under the License. 16adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 17adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 18adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectpackage java.nio.charset; 19adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 20ccb8b92211a3e87acaf6486c8d4423c2053b8b5eElliott Hughesimport com.ibm.icu4jni.charset.NativeConverter; 21adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.io.BufferedReader; 22adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.io.IOException; 23adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.io.InputStream; 24adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.io.InputStreamReader; 255cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughesimport java.io.UnsupportedEncodingException; 26adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.net.URL; 27adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.nio.ByteBuffer; 28adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.nio.CharBuffer; 29adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.nio.charset.spi.CharsetProvider; 30adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.security.AccessController; 31adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.security.PrivilegedAction; 32adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.util.Collections; 33adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.util.Comparator; 34adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.util.Enumeration; 35adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.util.HashMap; 36adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.util.HashSet; 37adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.util.Iterator; 38adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.util.Locale; 39bcf7c66e617ad0c33bb320184bb2401def517342Elliott Hughesimport java.util.Map; 40cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughesimport java.util.ServiceLoader; 41adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.util.Set; 42adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.util.SortedMap; 43adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.util.TreeMap; 44adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 45adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project/** 46c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * A charset is a named mapping between Unicode characters and byte sequences. Every 47c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * {@code Charset} can <i>decode</i>, converting a byte sequence into a sequence of characters, 48c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * and some can also <i>encode</i>, converting a sequence of characters into a byte sequence. 49c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * Use the method {@link #canEncode} to find out whether a charset supports both. 50c60bc1815dca549f3fb4e572f6aac749d7fa9fc6Elliott Hughes * 51c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <h4>Characters</h4> 52c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>In the context of this class, <i>character</i> always refers to a Java character: a Unicode 53c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * code point in the range U+0000 to U+FFFF. (Java represents supplementary characters using surrogates.) 54c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * Not all byte sequences will represent a character, and not 55c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * all characters can necessarily be represented by a given charset. The method {@link #contains} 56c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * can be used to determine whether every character representable by one charset can also be 57c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * represented by another (meaning that a lossless transformation is possible from the contained 58c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * to the container). 59c60bc1815dca549f3fb4e572f6aac749d7fa9fc6Elliott Hughes * 60c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <h4>Encodings</h4> 61c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>There are many possible ways to represent Unicode characters as byte sequences. 62c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * See <a href="http://www.unicode.org/reports/tr17/">UTR#17: Unicode Character Encoding Model</a> 63c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * for detailed discussion. 64c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * 65c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>The most important mappings capable of representing every character are the Unicode 66c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * Transformation Format (UTF) charsets. Of those, UTF-8 and the UTF-16 family are the most 67c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * common. UTF-8 (described in <a href="http://www.ietf.org/rfc/rfc3629.txt">RFC 3629</a>) 68c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * encodes a character using 1 to 4 bytes. UTF-16 uses exactly 2 bytes per character (potentially 69c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * wasting space, but allowing efficient random access into BMP text), and UTF-32 uses 70c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * exactly 4 bytes per character (trading off even more space for efficient random access into text 71c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * that includes supplementary characters). 72c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * 73c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>UTF-16 and UTF-32 encode characters directly, using their code point as a two- or four-byte 74c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * integer. This means that any given UTF-16 or UTF-32 byte sequence is either big- or 75c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * little-endian. To assist decoders, Unicode includes a special <i>byte order mark</i> (BOM) 76c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * character U+FEFF used to determine the endianness of a sequence. The corresponding byte-swapped 77c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * code point U+FFFE is guaranteed never to be assigned. If a UTF-16 decoder sees 78c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * {@code 0xfe, 0xff}, for example, it knows it's reading a big-endian byte sequence, while 79c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * {@code 0xff, 0xfe}, would indicate a little-endian byte sequence. 80c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * 81c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>UTF-8 can contain a BOM, but since the UTF-8 encoding of a character always uses the same 82c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * byte sequence, there is no information about endianness to convey. Seeing the bytes 83c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * corresponding to the UTF-8 encoding of U+FEFF ({@code 0xef, 0xbb, 0xbf}) would only serve to 84c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * suggest that you're reading UTF-8. Note that BOMs are decoded as the U+FEFF character, and 85c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * will appear in the output character sequence. This means that a disadvantage to including a BOM 86c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * in UTF-8 is that most applications that use UTF-8 do not expect to see a BOM. (This is also a 87c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * reason to prefer UTF-8: it's one less complication to worry about.) 88c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * 89c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>Because a BOM indicates how the data that follows should be interpreted, a BOM should occur 90c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * as the first character in a character sequence. 91c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * 92c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>See the <a href="http://unicode.org/faq/utf_bom.html#BOM">Byte Order Mark (BOM) FAQ</a> for 93c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * more about dealing with BOMs. 94c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * 95c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <h4>Endianness and BOM behavior</h4> 96c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * 97c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>The following tables show the endianness and BOM behavior of the UTF-16 variants. 98c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * 99c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>This table shows what the encoder writes. "BE" means that the byte sequence is big-endian, 100c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * "LE" means little-endian. "BE BOM" means a big-endian BOM (that is, {@code 0xfe, 0xff}). 101c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p><table width="100%"> 102c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <tr> <th>Charset</th> <th>Encoder writes</th> </tr> 103c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <tr> <td>UTF-16BE</td> <td>BE, no BOM</td> </tr> 104c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <tr> <td>UTF-16LE</td> <td>LE, no BOM</td> </tr> 105c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <tr> <td>UTF-16</td> <td>BE, with BE BOM</td> </tr> 106c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * </table> 107c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * 108c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>The next table shows how each variant's decoder behaves when reading a byte sequence. 109c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * The exact meaning of "failure" in the table is dependent on the 110c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * {@link CodingErrorAction} supplied to {@link CharsetDecoder#malformedInputAction}, so 111c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * "BE, failure" means "the byte sequence is treated as big-endian, and a little-endian BOM 112c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * triggers the malformedInputAction". 113c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * 114c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>The phrase "includes BOM" means that the output includes the U+FEFF byte order mark character. 115c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * 116c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p><table width="100%"> 117c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <tr> <th>Charset</th> <th>BE BOM</th> <th>LE BOM</th> <th>No BOM</th> </tr> 118c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <tr> <td>UTF-16BE</td> <td>BE, includes BOM</td> <td>BE, failure</td> <td>BE</td> </tr> 119c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <tr> <td>UTF-16LE</td> <td>LE, failure</td> <td>LE, includes BOM</td> <td>LE</td> </tr> 1203784ab1a0eafa37f1181df2815075300ab1c60f6Elliott Hughes * <tr> <td>UTF-16</td> <td>BE</td> <td>LE</td> <td>BE</td> </tr> 121c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * </table> 122c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * 123c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <h4>Charset names</h4> 124c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>A charset has a canonical name, returned by {@link #name}. Most charsets will 125c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * also have one or more aliases, returned by {@link #aliases}. A charset can be looked up 126c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * by canonical name or any of its aliases using {@link #forName}. 127c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * 128c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <h4>Guaranteed-available charsets</h4> 129c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>The following charsets are available on every Java implementation: 130c60bc1815dca549f3fb4e572f6aac749d7fa9fc6Elliott Hughes * <ul> 131c60bc1815dca549f3fb4e572f6aac749d7fa9fc6Elliott Hughes * <li>ISO-8859-1 132c60bc1815dca549f3fb4e572f6aac749d7fa9fc6Elliott Hughes * <li>US-ASCII 133c60bc1815dca549f3fb4e572f6aac749d7fa9fc6Elliott Hughes * <li>UTF-16 134c60bc1815dca549f3fb4e572f6aac749d7fa9fc6Elliott Hughes * <li>UTF-16BE 135c60bc1815dca549f3fb4e572f6aac749d7fa9fc6Elliott Hughes * <li>UTF-16LE 136c60bc1815dca549f3fb4e572f6aac749d7fa9fc6Elliott Hughes * <li>UTF-8 137c60bc1815dca549f3fb4e572f6aac749d7fa9fc6Elliott Hughes * </ul> 138c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>All of these charsets support both decoding and encoding. The charsets whose names begin 139c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * "UTF" can represent all characters, as mentioned above. The "ISO-8859-1" and "US-ASCII" charsets 140c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * can only represent small subsets of these characters. Except when required to do otherwise for 141c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * compatibility, new code should use one of the UTF charsets listed above. The platform's default 142c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * charset is UTF-8. (This is in contrast to some older implementations, where the default charset 143c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * depended on the user's locale.) 144c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * 145c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>Most implementations will support hundreds of charsets. Use {@link #availableCharsets} or 146c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * {@link #isSupported} to see what's available. If you intend to use the charset if it's 147c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * available, just call {@link #forName} and catch the exceptions it throws if the charset isn't 148c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * available. 149c60bc1815dca549f3fb4e572f6aac749d7fa9fc6Elliott Hughes * 150c60bc1815dca549f3fb4e572f6aac749d7fa9fc6Elliott Hughes * <p>Additional charsets can be made available by configuring one or more charset 151adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * providers through provider configuration files. Such files are always named 152adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * as "java.nio.charset.spi.CharsetProvider" and located in the 153cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes * "META-INF/services" directory of one or more classpaths. The files should be 154adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * encoded in "UTF-8". Each line of their content specifies the class name of a 155c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * charset provider which extends {@link java.nio.charset.spi.CharsetProvider}. 156cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes * A line should end with '\r', '\n' or '\r\n'. Leading and trailing whitespace 157cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes * is trimmed. Blank lines, and lines (after trimming) starting with "#" which are 158c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * regarded as comments, are both ignored. Duplicates of names already found are also 159adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * ignored. Both the configuration files and the provider classes will be loaded 160adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * using the thread context class loader. 161eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson * 162c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>Although class is thread-safe, the {@link CharsetDecoder} and {@link CharsetEncoder} instances 163c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * it returns are inherently stateful. 164adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 165adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectpublic abstract class Charset implements Comparable<Charset> { 166cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes private static final HashMap<String, Charset> CACHED_CHARSETS = new HashMap<String, Charset>(); 167c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes 168d0628c5cb80e3c1270634c56a784329a4836b9aaElliott Hughes private static final Charset DEFAULT_CHARSET = getDefaultCharset(); 169d0628c5cb80e3c1270634c56a784329a4836b9aaElliott Hughes 170adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project private final String canonicalName; 171adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 172adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project private final HashSet<String> aliasesSet; 173adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 174adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project /** 175adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * Constructs a <code>Charset</code> object. Duplicated aliases are 176adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * ignored. 177f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 178adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @param canonicalName 179adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * the canonical name of the charset. 180adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @param aliases 181adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * an array containing all aliases of the charset. May be null. 182adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @throws IllegalCharsetNameException 183adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * on an illegal value being supplied for either 184adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * <code>canonicalName</code> or for any element of 185adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * <code>aliases</code>. 186adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 187eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson protected Charset(String canonicalName, String[] aliases) { 188adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project // check whether the given canonical name is legal 189adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project checkCharsetName(canonicalName); 190adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project this.canonicalName = canonicalName; 191adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project // check each alias and put into a set 192adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project this.aliasesSet = new HashSet<String>(); 193bcf7c66e617ad0c33bb320184bb2401def517342Elliott Hughes if (aliases != null) { 194cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes for (String alias : aliases) { 195cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes checkCharsetName(alias); 196cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes this.aliasesSet.add(alias); 197adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 198adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 199adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 200adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 201adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project private static void checkCharsetName(String name) { 202c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes if (name.isEmpty()) { 203adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project throw new IllegalCharsetNameException(name); 204adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 205adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project int length = name.length(); 206cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes for (int i = 0; i < length; ++i) { 207cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes if (!isValidCharsetNameCharacter(name.charAt(i))) { 208adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project throw new IllegalCharsetNameException(name); 209adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 210adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 211adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 212adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 213cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes private static boolean isValidCharsetNameCharacter(char c) { 214cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || 215cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes c == '-' || c == '.' || c == ':' || c == '_'; 216ccb8b92211a3e87acaf6486c8d4423c2053b8b5eElliott Hughes } 217ccb8b92211a3e87acaf6486c8d4423c2053b8b5eElliott Hughes 218adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project /** 219ccb8b92211a3e87acaf6486c8d4423c2053b8b5eElliott Hughes * Returns an immutable case-insensitive map from canonical names to {@code Charset} instances. 220ccb8b92211a3e87acaf6486c8d4423c2053b8b5eElliott Hughes * If multiple charsets have the same canonical name, it is unspecified which is returned in 221ccb8b92211a3e87acaf6486c8d4423c2053b8b5eElliott Hughes * the map. This method may be slow. If you know which charset you're looking for, use 222ccb8b92211a3e87acaf6486c8d4423c2053b8b5eElliott Hughes * {@link #forName}. 223ccb8b92211a3e87acaf6486c8d4423c2053b8b5eElliott Hughes * @return an immutable case-insensitive map from canonical names to {@code Charset} instances 224adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 225adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project public static SortedMap<String, Charset> availableCharsets() { 226ccb8b92211a3e87acaf6486c8d4423c2053b8b5eElliott Hughes // Start with a copy of the built-in charsets... 227ccb8b92211a3e87acaf6486c8d4423c2053b8b5eElliott Hughes TreeMap<String, Charset> charsets = new TreeMap<String, Charset>(String.CASE_INSENSITIVE_ORDER); 228cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes for (String charsetName : NativeConverter.getAvailableCharsetNames()) { 229cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes Charset charset = NativeConverter.charsetForName(charsetName); 230cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes charsets.put(charset.name(), charset); 231adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 232adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 233cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes // Add all charsets provided by all charset providers... 234cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes for (CharsetProvider charsetProvider : ServiceLoader.load(CharsetProvider.class, null)) { 235cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes Iterator<Charset> it = charsetProvider.charsets(); 236cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes while (it.hasNext()) { 237cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes Charset cs = it.next(); 238cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes // A CharsetProvider can't override a built-in Charset. 239cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes if (!charsets.containsKey(cs.name())) { 240cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes charsets.put(cs.name(), cs); 241adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 242adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 243adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 244adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 245cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes return Collections.unmodifiableSortedMap(charsets); 246adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 247adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 248cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes private static Charset cacheCharset(String charsetName, Charset cs) { 249cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes synchronized (CACHED_CHARSETS) { 250cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes // Get the canonical name for this charset, and the canonical instance from the table. 251cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes String canonicalName = cs.name(); 252cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes Charset canonicalCharset = CACHED_CHARSETS.get(canonicalName); 253cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes if (canonicalCharset == null) { 254cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes canonicalCharset = cs; 255cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes } 256bcf7c66e617ad0c33bb320184bb2401def517342Elliott Hughes 257cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes // Cache the charset by its canonical name... 258cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes CACHED_CHARSETS.put(canonicalName, canonicalCharset); 259c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes 260cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes // And the name the user used... (Section 1.4 of http://unicode.org/reports/tr22/ means 261cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes // that many non-alias, non-canonical names are valid. For example, "utf8" isn't an 262cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes // alias of the canonical name "UTF-8", but we shouldn't penalize consistent users of 263cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes // such names unduly.) 264cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes CACHED_CHARSETS.put(charsetName, canonicalCharset); 265c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes 266cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes // And all its aliases... 267cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes for (String alias : cs.aliasesSet) { 268cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes CACHED_CHARSETS.put(alias, canonicalCharset); 269cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes } 270c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes 271cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes return canonicalCharset; 272adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 273adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 274adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 275adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project /** 276cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes * Returns a {@code Charset} instance for the named charset. 277f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 278cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes * @param charsetName a charset name (either canonical or an alias) 279adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @throws IllegalCharsetNameException 280adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * if the specified charset name is illegal. 281adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @throws UnsupportedCharsetException 282adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * if the desired charset is not supported by this runtime. 283adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 284eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson public static Charset forName(String charsetName) { 285cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes // Is this charset in our cache? 286cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes Charset cs; 287cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes synchronized (CACHED_CHARSETS) { 288cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes cs = CACHED_CHARSETS.get(charsetName); 289cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes if (cs != null) { 290cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes return cs; 291cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes } 292cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes } 293cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes 294cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes // Is this a built-in charset supported by ICU? 295cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes if (charsetName == null) { 296cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes throw new IllegalCharsetNameException(charsetName); 297cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes } 298cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes checkCharsetName(charsetName); 299cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes cs = NativeConverter.charsetForName(charsetName); 300c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes if (cs != null) { 301cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes return cacheCharset(charsetName, cs); 302cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes } 303cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes 304cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes // Does a configured CharsetProvider have this charset? 305cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes for (CharsetProvider charsetProvider : ServiceLoader.load(CharsetProvider.class, null)) { 306cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes cs = charsetProvider.charsetForName(charsetName); 307cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes if (cs != null) { 308cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes return cacheCharset(charsetName, cs); 309cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes } 310adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 311cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes 312c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes throw new UnsupportedCharsetException(charsetName); 313adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 314adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 315adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project /** 3165cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes * Equivalent to {@code forName} but only throws {@code UnsupportedEncodingException}, 3175cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes * which is all pre-nio code claims to throw. 3185cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes * 3195cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes * @hide 3205cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes */ 3215cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes public static Charset forNameUEE(String charsetName) throws UnsupportedEncodingException { 3225cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes try { 3235cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes return Charset.forName(charsetName); 3245cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes } catch (Exception cause) { 3255cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes UnsupportedEncodingException ex = new UnsupportedEncodingException(charsetName); 3265cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes ex.initCause(cause); 3275cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes throw ex; 3285cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes } 3295cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes } 3305cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes 3315cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes /** 332adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * Determines whether the specified charset is supported by this runtime. 333f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 334adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @param charsetName 335adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * the name of the charset. 336adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @return true if the specified charset is supported, otherwise false. 337adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @throws IllegalCharsetNameException 338adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * if the specified charset name is illegal. 339adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 340c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes public static boolean isSupported(String charsetName) { 341c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes try { 342c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes Charset cs = forName(charsetName); 343c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes return true; 344c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes } catch (UnsupportedCharsetException ex) { 345c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes return false; 346c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes } 347adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 348adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 349adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project /** 350c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * Determines whether this charset is a superset of the given charset. A charset C1 contains 351c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * charset C2 if every character representable by C2 is also representable by C1. This means 352c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * that lossless conversion is possible from C2 to C1 (but not necessarily the other way 353c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * round). It does <i>not</i> imply that the two charsets use the same byte sequences for the 354c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * characters they share. 355c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * 356c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>Note that this method is allowed to be conservative, and some implementations may return 357c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * false when this charset does contain the other charset. Android's implementation is precise, 358c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * and will always return true in such cases. 359f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 360adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @param charset 361adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * a given charset. 362adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @return true if this charset is a super set of the given charset, 363adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * false if it's unknown or this charset is not a superset of 364adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * the given charset. 365adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 366adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project public abstract boolean contains(Charset charset); 367adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 368adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project /** 369adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * Gets a new instance of an encoder for this charset. 370f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 371adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @return a new instance of an encoder for this charset. 372adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 373adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project public abstract CharsetEncoder newEncoder(); 374adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 375adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project /** 376adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * Gets a new instance of a decoder for this charset. 377f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 378adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @return a new instance of a decoder for this charset. 379adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 380adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project public abstract CharsetDecoder newDecoder(); 381adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 382adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project /** 383adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * Gets the canonical name of this charset. 384f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 385adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @return this charset's name in canonical form. 386adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 387adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project public final String name() { 388adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project return this.canonicalName; 389adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 390adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 391adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project /** 392adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * Gets the set of this charset's aliases. 393f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 394adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @return an unmodifiable set of this charset's aliases. 395adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 396adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project public final Set<String> aliases() { 397adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project return Collections.unmodifiableSet(this.aliasesSet); 398adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 399adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 400adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project /** 401adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * Gets the name of this charset for the default locale. 402f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 403eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson * <p>The default implementation returns the canonical name of this charset. 404eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson * Subclasses may return a localized display name. 405eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson * 406adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @return the name of this charset for the default locale. 407adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 408adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project public String displayName() { 409adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project return this.canonicalName; 410adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 411adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 412adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project /** 413adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * Gets the name of this charset for the specified locale. 414eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson * 415eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson * <p>The default implementation returns the canonical name of this charset. 416eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson * Subclasses may return a localized display name. 417eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson * 418adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @param l 419adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * a certain locale 420eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson * @return the name of this charset for the specified locale 421adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 422adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project public String displayName(Locale l) { 423adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project return this.canonicalName; 424adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 425adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 426adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project /** 427adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * Indicates whether this charset is known to be registered in the IANA 428adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * Charset Registry. 429f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 430adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @return true if the charset is known to be registered, otherwise returns 431adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * false. 432adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 433adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project public final boolean isRegistered() { 434cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes return !canonicalName.startsWith("x-") && !canonicalName.startsWith("X-"); 435adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 436adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 437adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project /** 438adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * Returns true if this charset supports encoding, false otherwise. 439f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 440adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @return true if this charset supports encoding, false otherwise. 441adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 442adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project public boolean canEncode() { 443adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project return true; 444adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 445adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 446adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project /** 447c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * Returns a new {@code ByteBuffer} containing the bytes encoding the characters from 448c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * {@code buffer}. 449c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * This method uses {@code CodingErrorAction.REPLACE}. 450c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * 451c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>Applications should generally create a {@link CharsetEncoder} using {@link #newEncoder} 452c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * for performance. 453eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson * 454adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @param buffer 455adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * the character buffer containing the content to be encoded. 456adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @return the result of the encoding. 457adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 458eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson public final ByteBuffer encode(CharBuffer buffer) { 459adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project try { 460c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes return newEncoder() 461eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson .onMalformedInput(CodingErrorAction.REPLACE) 462eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson .onUnmappableCharacter(CodingErrorAction.REPLACE).encode( 463eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson buffer); 464adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } catch (CharacterCodingException ex) { 465adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project throw new Error(ex.getMessage(), ex); 466adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 467adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 468adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 469adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project /** 470c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * Returns a new {@code ByteBuffer} containing the bytes encoding the characters from {@code s}. 471c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * This method uses {@code CodingErrorAction.REPLACE}. 472eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson * 473c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>Applications should generally create a {@link CharsetEncoder} using {@link #newEncoder} 474c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * for performance. 475c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * 476c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * @param s the string to be encoded. 477adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @return the result of the encoding. 478adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 479adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project public final ByteBuffer encode(String s) { 480adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project return encode(CharBuffer.wrap(s)); 481adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 482adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 483adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project /** 484c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * Returns a new {@code CharBuffer} containing the characters decoded from {@code buffer}. 485c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * This method uses {@code CodingErrorAction.REPLACE}. 486c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * 487c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>Applications should generally create a {@link CharsetDecoder} using {@link #newDecoder} 488c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * for performance. 489f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 490adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @param buffer 491adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * the byte buffer containing the content to be decoded. 492adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @return a character buffer containing the output of the decoding. 493adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 494adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project public final CharBuffer decode(ByteBuffer buffer) { 495adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project try { 496c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes return newDecoder() 497eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson .onMalformedInput(CodingErrorAction.REPLACE) 498c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes .onUnmappableCharacter(CodingErrorAction.REPLACE).decode(buffer); 499adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } catch (CharacterCodingException ex) { 500adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project throw new Error(ex.getMessage(), ex); 501adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 502adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 503adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 504adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project /* 505adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * ------------------------------------------------------------------- 506adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * Methods implementing parent interface Comparable 507adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * ------------------------------------------------------------------- 508adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 509adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 510adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project /** 511c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * Compares this charset with the given charset. This comparison is 512adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * based on the case insensitive canonical names of the charsets. 513f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 514adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @param charset 515adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * the given object to be compared with. 516adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @return a negative integer if less than the given object, a positive 517adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * integer if larger than it, or 0 if equal to it. 518adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 519adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project public final int compareTo(Charset charset) { 520adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project return this.canonicalName.compareToIgnoreCase(charset.canonicalName); 521adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 522adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 523adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project /* 524adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * ------------------------------------------------------------------- 525adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * Methods overriding parent class Object 526adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * ------------------------------------------------------------------- 527adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 528adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 529adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project /** 530adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * Determines whether this charset equals to the given object. They are 531adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * considered to be equal if they have the same canonical name. 532f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 533adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @param obj 534adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * the given object to be compared with. 535adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @return true if they have the same canonical name, otherwise false. 536adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 537adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project @Override 538adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project public final boolean equals(Object obj) { 539adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project if (obj instanceof Charset) { 540adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project Charset that = (Charset) obj; 541adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project return this.canonicalName.equals(that.canonicalName); 542adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 543adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project return false; 544adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 545adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 546adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project /** 547adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * Gets the hash code of this charset. 548f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 549adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @return the hash code of this charset. 550adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 551adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project @Override 552adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project public final int hashCode() { 553adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project return this.canonicalName.hashCode(); 554adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 555adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 556adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project /** 557adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * Gets a string representation of this charset. Usually this contains the 558adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * canonical name of the charset. 559f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes * 560adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * @return a string representation of this charset. 561adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 562adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project @Override 563adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project public final String toString() { 564cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes return getClass().getName() + "[" + this.canonicalName + "]"; 565adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 566adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project 567adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project /** 568c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes * Returns the system's default charset. This is determined during VM startup, and will not 569c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes * change thereafter. On Android, the default charset is UTF-8. 570adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */ 571adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project public static Charset defaultCharset() { 572d0628c5cb80e3c1270634c56a784329a4836b9aaElliott Hughes return DEFAULT_CHARSET; 573d0628c5cb80e3c1270634c56a784329a4836b9aaElliott Hughes } 574d0628c5cb80e3c1270634c56a784329a4836b9aaElliott Hughes 575d0628c5cb80e3c1270634c56a784329a4836b9aaElliott Hughes private static Charset getDefaultCharset() { 576c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes String encoding = AccessController.doPrivileged(new PrivilegedAction<String>() { 577c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes public String run() { 578c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes return System.getProperty("file.encoding", "UTF-8"); 579c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes } 580c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes }); 581adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project try { 582c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes return Charset.forName(encoding); 583adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } catch (UnsupportedCharsetException e) { 584c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes return Charset.forName("UTF-8"); 585adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 586adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project } 587adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project} 588