Charset.java revision 5cd6df2f627e06f9b7f714181d70d3148a3d6c60
1adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project/*
2adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project *  Licensed to the Apache Software Foundation (ASF) under one or more
3adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project *  contributor license agreements.  See the NOTICE file distributed with
4adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project *  this work for additional information regarding copyright ownership.
5adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project *  The ASF licenses this file to You under the Apache License, Version 2.0
6adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project *  (the "License"); you may not use this file except in compliance with
7adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project *  the License.  You may obtain a copy of the License at
8adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project *
9adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project *     http://www.apache.org/licenses/LICENSE-2.0
10adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project *
11adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project *  Unless required by applicable law or agreed to in writing, software
12adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project *  distributed under the License is distributed on an "AS IS" BASIS,
13adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project *  See the License for the specific language governing permissions and
15adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project *  limitations under the License.
16adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */
17adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
18adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectpackage java.nio.charset;
19adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
20ccb8b92211a3e87acaf6486c8d4423c2053b8b5eElliott Hughesimport com.ibm.icu4jni.charset.NativeConverter;
21adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.io.BufferedReader;
22adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.io.IOException;
23adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.io.InputStream;
24adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.io.InputStreamReader;
255cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughesimport java.io.UnsupportedEncodingException;
26adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.net.URL;
27adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.nio.ByteBuffer;
28adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.nio.CharBuffer;
29adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.nio.charset.spi.CharsetProvider;
30adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.security.AccessController;
31adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.security.PrivilegedAction;
32adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.util.Collections;
33adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.util.Comparator;
34adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.util.Enumeration;
35adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.util.HashMap;
36adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.util.HashSet;
37adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.util.Iterator;
38adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.util.Locale;
39bcf7c66e617ad0c33bb320184bb2401def517342Elliott Hughesimport java.util.Map;
40cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughesimport java.util.ServiceLoader;
41adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.util.Set;
42adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.util.SortedMap;
43adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectimport java.util.TreeMap;
44adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
45adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project/**
46c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * A charset is a named mapping between Unicode characters and byte sequences. Every
47c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * {@code Charset} can <i>decode</i>, converting a byte sequence into a sequence of characters,
48c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * and some can also <i>encode</i>, converting a sequence of characters into a byte sequence.
49c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * Use the method {@link #canEncode} to find out whether a charset supports both.
50c60bc1815dca549f3fb4e572f6aac749d7fa9fc6Elliott Hughes *
51c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <h4>Characters</h4>
52c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>In the context of this class, <i>character</i> always refers to a Java character: a Unicode
53c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * code point in the range U+0000 to U+FFFF. (Java represents supplementary characters using surrogates.)
54c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * Not all byte sequences will represent a character, and not
55c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * all characters can necessarily be represented by a given charset. The method {@link #contains}
56c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * can be used to determine whether every character representable by one charset can also be
57c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * represented by another (meaning that a lossless transformation is possible from the contained
58c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * to the container).
59c60bc1815dca549f3fb4e572f6aac749d7fa9fc6Elliott Hughes *
60c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <h4>Encodings</h4>
61c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>There are many possible ways to represent Unicode characters as byte sequences.
62c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * See <a href="http://www.unicode.org/reports/tr17/">UTR#17: Unicode Character Encoding Model</a>
63c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * for detailed discussion.
64c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes *
65c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>The most important mappings capable of representing every character are the Unicode
66c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * Transformation Format (UTF) charsets. Of those, UTF-8 and the UTF-16 family are the most
67c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * common. UTF-8 (described in <a href="http://www.ietf.org/rfc/rfc3629.txt">RFC 3629</a>)
68c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * encodes a character using 1 to 4 bytes. UTF-16 uses exactly 2 bytes per character (potentially
69c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * wasting space, but allowing efficient random access into BMP text), and UTF-32 uses
70c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * exactly 4 bytes per character (trading off even more space for efficient random access into text
71c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * that includes supplementary characters).
72c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes *
73c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>UTF-16 and UTF-32 encode characters directly, using their code point as a two- or four-byte
74c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * integer. This means that any given UTF-16 or UTF-32 byte sequence is either big- or
75c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * little-endian. To assist decoders, Unicode includes a special <i>byte order mark</i> (BOM)
76c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * character U+FEFF used to determine the endianness of a sequence. The corresponding byte-swapped
77c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * code point U+FFFE is guaranteed never to be assigned. If a UTF-16 decoder sees
78c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * {@code 0xfe, 0xff}, for example, it knows it's reading a big-endian byte sequence, while
79c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * {@code 0xff, 0xfe}, would indicate a little-endian byte sequence.
80c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes *
81c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>UTF-8 can contain a BOM, but since the UTF-8 encoding of a character always uses the same
82c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * byte sequence, there is no information about endianness to convey. Seeing the bytes
83c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * corresponding to the UTF-8 encoding of U+FEFF ({@code 0xef, 0xbb, 0xbf}) would only serve to
84c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * suggest that you're reading UTF-8. Note that BOMs are decoded as the U+FEFF character, and
85c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * will appear in the output character sequence. This means that a disadvantage to including a BOM
86c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * in UTF-8 is that most applications that use UTF-8 do not expect to see a BOM. (This is also a
87c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * reason to prefer UTF-8: it's one less complication to worry about.)
88c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes *
89c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>Because a BOM indicates how the data that follows should be interpreted, a BOM should occur
90c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * as the first character in a character sequence.
91c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes *
92c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>See the <a href="http://unicode.org/faq/utf_bom.html#BOM">Byte Order Mark (BOM) FAQ</a> for
93c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * more about dealing with BOMs.
94c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes *
95c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <h4>Endianness and BOM behavior</h4>
96c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes *
97c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>The following tables show the endianness and BOM behavior of the UTF-16 variants.
98c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes *
99c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>This table shows what the encoder writes. "BE" means that the byte sequence is big-endian,
100c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * "LE" means little-endian. "BE BOM" means a big-endian BOM (that is, {@code 0xfe, 0xff}).
101c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p><table width="100%">
102c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <tr> <th>Charset</th>  <th>Encoder writes</th>  </tr>
103c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <tr> <td>UTF-16BE</td> <td>BE, no BOM</td>      </tr>
104c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <tr> <td>UTF-16LE</td> <td>LE, no BOM</td>      </tr>
105c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <tr> <td>UTF-16</td>   <td>BE, with BE BOM</td> </tr>
106c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * </table>
107c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes *
108c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>The next table shows how each variant's decoder behaves when reading a byte sequence.
109c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * The exact meaning of "failure" in the table is dependent on the
110c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * {@link CodingErrorAction} supplied to {@link CharsetDecoder#malformedInputAction}, so
111c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * "BE, failure" means "the byte sequence is treated as big-endian, and a little-endian BOM
112c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * triggers the malformedInputAction".
113c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes *
114c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>The phrase "includes BOM" means that the output includes the U+FEFF byte order mark character.
115c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes *
116c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p><table width="100%">
117c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <tr> <th>Charset</th>  <th>BE BOM</th>           <th>LE BOM</th>           <th>No BOM</th> </tr>
118c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <tr> <td>UTF-16BE</td> <td>BE, includes BOM</td> <td>BE, failure</td>      <td>BE</td>     </tr>
119c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <tr> <td>UTF-16LE</td> <td>LE, failure</td>      <td>LE, includes BOM</td> <td>LE</td>     </tr>
1203784ab1a0eafa37f1181df2815075300ab1c60f6Elliott Hughes * <tr> <td>UTF-16</td>   <td>BE</td>               <td>LE</td>               <td>BE</td>     </tr>
121c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * </table>
122c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes *
123c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <h4>Charset names</h4>
124c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>A charset has a canonical name, returned by {@link #name}. Most charsets will
125c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * also have one or more aliases, returned by {@link #aliases}. A charset can be looked up
126c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * by canonical name or any of its aliases using {@link #forName}.
127c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes *
128c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <h4>Guaranteed-available charsets</h4>
129c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>The following charsets are available on every Java implementation:
130c60bc1815dca549f3fb4e572f6aac749d7fa9fc6Elliott Hughes * <ul>
131c60bc1815dca549f3fb4e572f6aac749d7fa9fc6Elliott Hughes * <li>ISO-8859-1
132c60bc1815dca549f3fb4e572f6aac749d7fa9fc6Elliott Hughes * <li>US-ASCII
133c60bc1815dca549f3fb4e572f6aac749d7fa9fc6Elliott Hughes * <li>UTF-16
134c60bc1815dca549f3fb4e572f6aac749d7fa9fc6Elliott Hughes * <li>UTF-16BE
135c60bc1815dca549f3fb4e572f6aac749d7fa9fc6Elliott Hughes * <li>UTF-16LE
136c60bc1815dca549f3fb4e572f6aac749d7fa9fc6Elliott Hughes * <li>UTF-8
137c60bc1815dca549f3fb4e572f6aac749d7fa9fc6Elliott Hughes * </ul>
138c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>All of these charsets support both decoding and encoding. The charsets whose names begin
139c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * "UTF" can represent all characters, as mentioned above. The "ISO-8859-1" and "US-ASCII" charsets
140c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * can only represent small subsets of these characters. Except when required to do otherwise for
141c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * compatibility, new code should use one of the UTF charsets listed above. The platform's default
142c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * charset is UTF-8. (This is in contrast to some older implementations, where the default charset
143c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * depended on the user's locale.)
144c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes *
145c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>Most implementations will support hundreds of charsets. Use {@link #availableCharsets} or
146c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * {@link #isSupported} to see what's available. If you intend to use the charset if it's
147c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * available, just call {@link #forName} and catch the exceptions it throws if the charset isn't
148c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * available.
149c60bc1815dca549f3fb4e572f6aac749d7fa9fc6Elliott Hughes *
150c60bc1815dca549f3fb4e572f6aac749d7fa9fc6Elliott Hughes * <p>Additional charsets can be made available by configuring one or more charset
151adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * providers through provider configuration files. Such files are always named
152adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * as "java.nio.charset.spi.CharsetProvider" and located in the
153cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes * "META-INF/services" directory of one or more classpaths. The files should be
154adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * encoded in "UTF-8". Each line of their content specifies the class name of a
155c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * charset provider which extends {@link java.nio.charset.spi.CharsetProvider}.
156cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes * A line should end with '\r', '\n' or '\r\n'. Leading and trailing whitespace
157cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes * is trimmed. Blank lines, and lines (after trimming) starting with "#" which are
158c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * regarded as comments, are both ignored. Duplicates of names already found are also
159adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * ignored. Both the configuration files and the provider classes will be loaded
160adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project * using the thread context class loader.
161eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson *
162c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * <p>Although class is thread-safe, the {@link CharsetDecoder} and {@link CharsetEncoder} instances
163c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes * it returns are inherently stateful.
164adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project */
165adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Projectpublic abstract class Charset implements Comparable<Charset> {
166cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes    private static final HashMap<String, Charset> CACHED_CHARSETS = new HashMap<String, Charset>();
167c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes
168d0628c5cb80e3c1270634c56a784329a4836b9aaElliott Hughes    private static final Charset DEFAULT_CHARSET = getDefaultCharset();
169d0628c5cb80e3c1270634c56a784329a4836b9aaElliott Hughes
170adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    private final String canonicalName;
171adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
172adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    private final HashSet<String> aliasesSet;
173adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
174adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    /**
175adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * Constructs a <code>Charset</code> object. Duplicated aliases are
176adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * ignored.
177f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes     *
178adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @param canonicalName
179adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     *            the canonical name of the charset.
180adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @param aliases
181adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     *            an array containing all aliases of the charset. May be null.
182adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @throws IllegalCharsetNameException
183adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     *             on an illegal value being supplied for either
184adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     *             <code>canonicalName</code> or for any element of
185adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     *             <code>aliases</code>.
186adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     */
187eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson    protected Charset(String canonicalName, String[] aliases) {
188adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        // check whether the given canonical name is legal
189adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        checkCharsetName(canonicalName);
190adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        this.canonicalName = canonicalName;
191adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        // check each alias and put into a set
192adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        this.aliasesSet = new HashSet<String>();
193bcf7c66e617ad0c33bb320184bb2401def517342Elliott Hughes        if (aliases != null) {
194cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            for (String alias : aliases) {
195cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes                checkCharsetName(alias);
196cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes                this.aliasesSet.add(alias);
197adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project            }
198adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        }
199adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    }
200adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
201adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    private static void checkCharsetName(String name) {
202c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes        if (name.isEmpty()) {
203adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project            throw new IllegalCharsetNameException(name);
204adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        }
205adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        int length = name.length();
206cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes        for (int i = 0; i < length; ++i) {
207cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            if (!isValidCharsetNameCharacter(name.charAt(i))) {
208adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project                throw new IllegalCharsetNameException(name);
209adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project            }
210adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        }
211adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    }
212adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
213cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes    private static boolean isValidCharsetNameCharacter(char c) {
214cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes        return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') ||
215cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes                c == '-' || c == '.' || c == ':' || c == '_';
216ccb8b92211a3e87acaf6486c8d4423c2053b8b5eElliott Hughes    }
217ccb8b92211a3e87acaf6486c8d4423c2053b8b5eElliott Hughes
218adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    /**
219ccb8b92211a3e87acaf6486c8d4423c2053b8b5eElliott Hughes     * Returns an immutable case-insensitive map from canonical names to {@code Charset} instances.
220ccb8b92211a3e87acaf6486c8d4423c2053b8b5eElliott Hughes     * If multiple charsets have the same canonical name, it is unspecified which is returned in
221ccb8b92211a3e87acaf6486c8d4423c2053b8b5eElliott Hughes     * the map. This method may be slow. If you know which charset you're looking for, use
222ccb8b92211a3e87acaf6486c8d4423c2053b8b5eElliott Hughes     * {@link #forName}.
223ccb8b92211a3e87acaf6486c8d4423c2053b8b5eElliott Hughes     * @return an immutable case-insensitive map from canonical names to {@code Charset} instances
224adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     */
225adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    public static SortedMap<String, Charset> availableCharsets() {
226ccb8b92211a3e87acaf6486c8d4423c2053b8b5eElliott Hughes        // Start with a copy of the built-in charsets...
227ccb8b92211a3e87acaf6486c8d4423c2053b8b5eElliott Hughes        TreeMap<String, Charset> charsets = new TreeMap<String, Charset>(String.CASE_INSENSITIVE_ORDER);
228cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes        for (String charsetName : NativeConverter.getAvailableCharsetNames()) {
229cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            Charset charset = NativeConverter.charsetForName(charsetName);
230cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            charsets.put(charset.name(), charset);
231adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        }
232adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
233cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes        // Add all charsets provided by all charset providers...
234cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes        for (CharsetProvider charsetProvider : ServiceLoader.load(CharsetProvider.class, null)) {
235cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            Iterator<Charset> it = charsetProvider.charsets();
236cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            while (it.hasNext()) {
237cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes                Charset cs = it.next();
238cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes                // A CharsetProvider can't override a built-in Charset.
239cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes                if (!charsets.containsKey(cs.name())) {
240cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes                    charsets.put(cs.name(), cs);
241adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project                }
242adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project            }
243adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        }
244adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
245cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes        return Collections.unmodifiableSortedMap(charsets);
246adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    }
247adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
248cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes    private static Charset cacheCharset(String charsetName, Charset cs) {
249cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes        synchronized (CACHED_CHARSETS) {
250cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            // Get the canonical name for this charset, and the canonical instance from the table.
251cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            String canonicalName = cs.name();
252cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            Charset canonicalCharset = CACHED_CHARSETS.get(canonicalName);
253cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            if (canonicalCharset == null) {
254cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes                canonicalCharset = cs;
255cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            }
256bcf7c66e617ad0c33bb320184bb2401def517342Elliott Hughes
257cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            // Cache the charset by its canonical name...
258cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            CACHED_CHARSETS.put(canonicalName, canonicalCharset);
259c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes
260cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            // And the name the user used... (Section 1.4 of http://unicode.org/reports/tr22/ means
261cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            // that many non-alias, non-canonical names are valid. For example, "utf8" isn't an
262cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            // alias of the canonical name "UTF-8", but we shouldn't penalize consistent users of
263cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            // such names unduly.)
264cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            CACHED_CHARSETS.put(charsetName, canonicalCharset);
265c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes
266cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            // And all its aliases...
267cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            for (String alias : cs.aliasesSet) {
268cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes                CACHED_CHARSETS.put(alias, canonicalCharset);
269cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            }
270c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes
271cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            return canonicalCharset;
272adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        }
273adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    }
274adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
275adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    /**
276cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes     * Returns a {@code Charset} instance for the named charset.
277f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes     *
278cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes     * @param charsetName a charset name (either canonical or an alias)
279adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @throws IllegalCharsetNameException
280adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     *             if the specified charset name is illegal.
281adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @throws UnsupportedCharsetException
282adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     *             if the desired charset is not supported by this runtime.
283adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     */
284eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson    public static Charset forName(String charsetName) {
285cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes        // Is this charset in our cache?
286cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes        Charset cs;
287cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes        synchronized (CACHED_CHARSETS) {
288cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            cs = CACHED_CHARSETS.get(charsetName);
289cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            if (cs != null) {
290cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes                return cs;
291cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            }
292cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes        }
293cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes
294cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes        // Is this a built-in charset supported by ICU?
295cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes        if (charsetName == null) {
296cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            throw new IllegalCharsetNameException(charsetName);
297cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes        }
298cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes        checkCharsetName(charsetName);
299cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes        cs = NativeConverter.charsetForName(charsetName);
300c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes        if (cs != null) {
301cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            return cacheCharset(charsetName, cs);
302cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes        }
303cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes
304cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes        // Does a configured CharsetProvider have this charset?
305cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes        for (CharsetProvider charsetProvider : ServiceLoader.load(CharsetProvider.class, null)) {
306cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            cs = charsetProvider.charsetForName(charsetName);
307cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            if (cs != null) {
308cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes                return cacheCharset(charsetName, cs);
309cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes            }
310adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        }
311cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes
312c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes        throw new UnsupportedCharsetException(charsetName);
313adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    }
314adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
315adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    /**
3165cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes     * Equivalent to {@code forName} but only throws {@code UnsupportedEncodingException},
3175cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes     * which is all pre-nio code claims to throw.
3185cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes     *
3195cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes     * @hide
3205cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes     */
3215cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes    public static Charset forNameUEE(String charsetName) throws UnsupportedEncodingException {
3225cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes        try {
3235cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes            return Charset.forName(charsetName);
3245cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes        } catch (Exception cause) {
3255cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes            UnsupportedEncodingException ex = new UnsupportedEncodingException(charsetName);
3265cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes            ex.initCause(cause);
3275cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes            throw ex;
3285cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes        }
3295cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes    }
3305cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes
3315cd6df2f627e06f9b7f714181d70d3148a3d6c60Elliott Hughes    /**
332adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * Determines whether the specified charset is supported by this runtime.
333f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes     *
334adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @param charsetName
335adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     *            the name of the charset.
336adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @return true if the specified charset is supported, otherwise false.
337adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @throws IllegalCharsetNameException
338adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     *             if the specified charset name is illegal.
339adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     */
340c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes    public static boolean isSupported(String charsetName) {
341c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes        try {
342c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes            Charset cs = forName(charsetName);
343c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes            return true;
344c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes        } catch (UnsupportedCharsetException ex) {
345c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes            return false;
346c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes        }
347adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    }
348adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
349adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    /**
350c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     * Determines whether this charset is a superset of the given charset. A charset C1 contains
351c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     * charset C2 if every character representable by C2 is also representable by C1. This means
352c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     * that lossless conversion is possible from C2 to C1 (but not necessarily the other way
353c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     * round). It does <i>not</i> imply that the two charsets use the same byte sequences for the
354c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     * characters they share.
355c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     *
356c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     * <p>Note that this method is allowed to be conservative, and some implementations may return
357c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     * false when this charset does contain the other charset. Android's implementation is precise,
358c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     * and will always return true in such cases.
359f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes     *
360adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @param charset
361adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     *            a given charset.
362adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @return true if this charset is a super set of the given charset,
363adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     *         false if it's unknown or this charset is not a superset of
364adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     *         the given charset.
365adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     */
366adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    public abstract boolean contains(Charset charset);
367adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
368adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    /**
369adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * Gets a new instance of an encoder for this charset.
370f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes     *
371adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @return a new instance of an encoder for this charset.
372adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     */
373adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    public abstract CharsetEncoder newEncoder();
374adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
375adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    /**
376adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * Gets a new instance of a decoder for this charset.
377f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes     *
378adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @return a new instance of a decoder for this charset.
379adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     */
380adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    public abstract CharsetDecoder newDecoder();
381adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
382adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    /**
383adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * Gets the canonical name of this charset.
384f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes     *
385adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @return this charset's name in canonical form.
386adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     */
387adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    public final String name() {
388adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        return this.canonicalName;
389adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    }
390adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
391adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    /**
392adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * Gets the set of this charset's aliases.
393f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes     *
394adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @return an unmodifiable set of this charset's aliases.
395adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     */
396adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    public final Set<String> aliases() {
397adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        return Collections.unmodifiableSet(this.aliasesSet);
398adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    }
399adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
400adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    /**
401adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * Gets the name of this charset for the default locale.
402f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes     *
403eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson     * <p>The default implementation returns the canonical name of this charset.
404eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson     * Subclasses may return a localized display name.
405eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson     *
406adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @return the name of this charset for the default locale.
407adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     */
408adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    public String displayName() {
409adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        return this.canonicalName;
410adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    }
411adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
412adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    /**
413adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * Gets the name of this charset for the specified locale.
414eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson     *
415eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson     * <p>The default implementation returns the canonical name of this charset.
416eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson     * Subclasses may return a localized display name.
417eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson     *
418adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @param l
419adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     *            a certain locale
420eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson     * @return the name of this charset for the specified locale
421adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     */
422adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    public String displayName(Locale l) {
423adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        return this.canonicalName;
424adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    }
425adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
426adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    /**
427adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * Indicates whether this charset is known to be registered in the IANA
428adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * Charset Registry.
429f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes     *
430adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @return true if the charset is known to be registered, otherwise returns
431adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     *         false.
432adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     */
433adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    public final boolean isRegistered() {
434cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes        return !canonicalName.startsWith("x-") && !canonicalName.startsWith("X-");
435adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    }
436adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
437adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    /**
438adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * Returns true if this charset supports encoding, false otherwise.
439f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes     *
440adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @return true if this charset supports encoding, false otherwise.
441adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     */
442adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    public boolean canEncode() {
443adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        return true;
444adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    }
445adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
446adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    /**
447c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     * Returns a new {@code ByteBuffer} containing the bytes encoding the characters from
448c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     * {@code buffer}.
449c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     * This method uses {@code CodingErrorAction.REPLACE}.
450c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     *
451c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     * <p>Applications should generally create a {@link CharsetEncoder} using {@link #newEncoder}
452c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     * for performance.
453eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson     *
454adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @param buffer
455adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     *            the character buffer containing the content to be encoded.
456adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @return the result of the encoding.
457adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     */
458eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson    public final ByteBuffer encode(CharBuffer buffer) {
459adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        try {
460c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes            return newEncoder()
461eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson                    .onMalformedInput(CodingErrorAction.REPLACE)
462eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson                    .onUnmappableCharacter(CodingErrorAction.REPLACE).encode(
463eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson                            buffer);
464adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        } catch (CharacterCodingException ex) {
465adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project            throw new Error(ex.getMessage(), ex);
466adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        }
467adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    }
468adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
469adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    /**
470c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     * Returns a new {@code ByteBuffer} containing the bytes encoding the characters from {@code s}.
471c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     * This method uses {@code CodingErrorAction.REPLACE}.
472eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson     *
473c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     * <p>Applications should generally create a {@link CharsetEncoder} using {@link #newEncoder}
474c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     * for performance.
475c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     *
476c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     * @param s the string to be encoded.
477adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @return the result of the encoding.
478adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     */
479adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    public final ByteBuffer encode(String s) {
480adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        return encode(CharBuffer.wrap(s));
481adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    }
482adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
483adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    /**
484c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     * Returns a new {@code CharBuffer} containing the characters decoded from {@code buffer}.
485c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     * This method uses {@code CodingErrorAction.REPLACE}.
486c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     *
487c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     * <p>Applications should generally create a {@link CharsetDecoder} using {@link #newDecoder}
488c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     * for performance.
489f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes     *
490adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @param buffer
491adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     *            the byte buffer containing the content to be decoded.
492adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @return a character buffer containing the output of the decoding.
493adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     */
494adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    public final CharBuffer decode(ByteBuffer buffer) {
495adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        try {
496c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes            return newDecoder()
497eaa2ff09069424b0f7a95c7cd831cef1b744fe67Jesse Wilson                    .onMalformedInput(CodingErrorAction.REPLACE)
498c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes                    .onUnmappableCharacter(CodingErrorAction.REPLACE).decode(buffer);
499adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        } catch (CharacterCodingException ex) {
500adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project            throw new Error(ex.getMessage(), ex);
501adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        }
502adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    }
503adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
504adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    /*
505adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * -------------------------------------------------------------------
506adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * Methods implementing parent interface Comparable
507adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * -------------------------------------------------------------------
508adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     */
509adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
510adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    /**
511c89c180eb85cc0392c3a6c2eb4803594478e665cElliott Hughes     * Compares this charset with the given charset. This comparison is
512adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * based on the case insensitive canonical names of the charsets.
513f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes     *
514adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @param charset
515adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     *            the given object to be compared with.
516adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @return a negative integer if less than the given object, a positive
517adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     *         integer if larger than it, or 0 if equal to it.
518adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     */
519adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    public final int compareTo(Charset charset) {
520adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        return this.canonicalName.compareToIgnoreCase(charset.canonicalName);
521adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    }
522adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
523adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    /*
524adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * -------------------------------------------------------------------
525adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * Methods overriding parent class Object
526adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * -------------------------------------------------------------------
527adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     */
528adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
529adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    /**
530adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * Determines whether this charset equals to the given object. They are
531adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * considered to be equal if they have the same canonical name.
532f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes     *
533adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @param obj
534adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     *            the given object to be compared with.
535adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @return true if they have the same canonical name, otherwise false.
536adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     */
537adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    @Override
538adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    public final boolean equals(Object obj) {
539adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        if (obj instanceof Charset) {
540adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project            Charset that = (Charset) obj;
541adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project            return this.canonicalName.equals(that.canonicalName);
542adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        }
543adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        return false;
544adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    }
545adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
546adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    /**
547adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * Gets the hash code of this charset.
548f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes     *
549adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @return the hash code of this charset.
550adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     */
551adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    @Override
552adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    public final int hashCode() {
553adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        return this.canonicalName.hashCode();
554adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    }
555adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
556adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    /**
557adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * Gets a string representation of this charset. Usually this contains the
558adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * canonical name of the charset.
559f33eae7e84eb6d3b0f4e86b59605bb3de73009f3Elliott Hughes     *
560adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     * @return a string representation of this charset.
561adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     */
562adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    @Override
563adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    public final String toString() {
564cb8d09e94846d073ee7b50bef89c0b33113697fbElliott Hughes        return getClass().getName() + "[" + this.canonicalName + "]";
565adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    }
566adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project
567adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    /**
568c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes     * Returns the system's default charset. This is determined during VM startup, and will not
569c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes     * change thereafter. On Android, the default charset is UTF-8.
570adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project     */
571adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    public static Charset defaultCharset() {
572d0628c5cb80e3c1270634c56a784329a4836b9aaElliott Hughes        return DEFAULT_CHARSET;
573d0628c5cb80e3c1270634c56a784329a4836b9aaElliott Hughes    }
574d0628c5cb80e3c1270634c56a784329a4836b9aaElliott Hughes
575d0628c5cb80e3c1270634c56a784329a4836b9aaElliott Hughes    private static Charset getDefaultCharset() {
576c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes        String encoding = AccessController.doPrivileged(new PrivilegedAction<String>() {
577c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes            public String run() {
578c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes                return System.getProperty("file.encoding", "UTF-8");
579c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes            }
580c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes        });
581adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        try {
582c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes            return Charset.forName(encoding);
583adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        } catch (UnsupportedCharsetException e) {
584c903e6720bbbf6540c29f141bd2fa559813ea20aElliott Hughes            return Charset.forName("UTF-8");
585adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project        }
586adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project    }
587adc854b798c1cfe3bfd4c27d68d5cee38ca617daThe Android Open Source Project}
588