CharsetUtil.java revision 7dbeb7d91c7b3970426af6debe48301ba053fd79
1/****************************************************************
2 * Licensed to the Apache Software Foundation (ASF) under one   *
3 * or more contributor license agreements.  See the NOTICE file *
4 * distributed with this work for additional information        *
5 * regarding copyright ownership.  The ASF licenses this file   *
6 * to you under the Apache License, Version 2.0 (the            *
7 * "License"); you may not use this file except in compliance   *
8 * with the License.  You may obtain a copy of the License at   *
9 *                                                              *
10 *   http://www.apache.org/licenses/LICENSE-2.0                 *
11 *                                                              *
12 * Unless required by applicable law or agreed to in writing,   *
13 * software distributed under the License is distributed on an  *
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15 * KIND, either express or implied.  See the License for the    *
16 * specific language governing permissions and limitations      *
17 * under the License.                                           *
18 ****************************************************************/
19
20package org.apache.james.mime4j.util;
21
22import java.io.UnsupportedEncodingException;
23import java.nio.charset.IllegalCharsetNameException;
24import java.nio.charset.UnsupportedCharsetException;
25import java.util.HashMap;
26import java.util.TreeSet;
27
28import org.apache.commons.logging.Log;
29import org.apache.commons.logging.LogFactory;
30
31/**
32 * Utility class for working with character sets. It is somewhat similar to
33 * the Java 1.4 <code>java.nio.charset.Charset</code> class but knows many
34 * more aliases and is compatible with Java 1.3. It will use a simple detection
35 * mechanism to detect what character sets the current VM supports. This will
36 * be a sub-set of the character sets listed in the
37 * <a href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">
38 * Java 1.5 (J2SE5.0) Supported Encodings</a> document.
39 * <p>
40 * The <a href="http://www.iana.org/assignments/character-sets">
41 * IANA Character Sets</a> document has been used to determine the preferred
42 * MIME character set names and to get a list of known aliases.
43 * <p>
44 * This is a complete list of the character sets known to this class:
45 * <table>
46 *     <tr>
47 *         <td>Canonical (Java) name</td>
48 *         <td>MIME preferred</td>
49 *         <td>Aliases</td>
50 *     </tr>
51 *     <tr>
52 *         <td>ASCII</td>
53 *         <td>US-ASCII</td>
54 *         <td>ANSI_X3.4-1968 iso-ir-6 ANSI_X3.4-1986 ISO_646.irv:1991 ISO646-US us IBM367 cp367 csASCII ascii7 646 iso_646.irv:1983 </td>
55 *     </tr>
56 *     <tr>
57 *         <td>Big5</td>
58 *         <td>Big5</td>
59 *         <td>csBig5 CN-Big5 BIG-FIVE BIGFIVE </td>
60 *     </tr>
61 *     <tr>
62 *         <td>Big5_HKSCS</td>
63 *         <td>Big5-HKSCS</td>
64 *         <td>big5hkscs </td>
65 *     </tr>
66 *     <tr>
67 *         <td>Big5_Solaris</td>
68 *         <td>?</td>
69 *         <td></td>
70 *     </tr>
71 *     <tr>
72 *         <td>Cp037</td>
73 *         <td>IBM037</td>
74 *         <td>ebcdic-cp-us ebcdic-cp-ca ebcdic-cp-wt ebcdic-cp-nl csIBM037 </td>
75 *     </tr>
76 *     <tr>
77 *         <td>Cp1006</td>
78 *         <td>?</td>
79 *         <td></td>
80 *     </tr>
81 *     <tr>
82 *         <td>Cp1025</td>
83 *         <td>?</td>
84 *         <td></td>
85 *     </tr>
86 *     <tr>
87 *         <td>Cp1026</td>
88 *         <td>IBM1026</td>
89 *         <td>csIBM1026 </td>
90 *     </tr>
91 *     <tr>
92 *         <td>Cp1046</td>
93 *         <td>?</td>
94 *         <td></td>
95 *     </tr>
96 *     <tr>
97 *         <td>Cp1047</td>
98 *         <td>IBM1047</td>
99 *         <td>IBM-1047 </td>
100 *     </tr>
101 *     <tr>
102 *         <td>Cp1097</td>
103 *         <td>?</td>
104 *         <td></td>
105 *     </tr>
106 *     <tr>
107 *         <td>Cp1098</td>
108 *         <td>?</td>
109 *         <td></td>
110 *     </tr>
111 *     <tr>
112 *         <td>Cp1112</td>
113 *         <td>?</td>
114 *         <td></td>
115 *     </tr>
116 *     <tr>
117 *         <td>Cp1122</td>
118 *         <td>?</td>
119 *         <td></td>
120 *     </tr>
121 *     <tr>
122 *         <td>Cp1123</td>
123 *         <td>?</td>
124 *         <td></td>
125 *     </tr>
126 *     <tr>
127 *         <td>Cp1124</td>
128 *         <td>?</td>
129 *         <td></td>
130 *     </tr>
131 *     <tr>
132 *         <td>Cp1140</td>
133 *         <td>IBM01140</td>
134 *         <td>CCSID01140 CP01140 ebcdic-us-37+euro </td>
135 *     </tr>
136 *     <tr>
137 *         <td>Cp1141</td>
138 *         <td>IBM01141</td>
139 *         <td>CCSID01141 CP01141 ebcdic-de-273+euro </td>
140 *     </tr>
141 *     <tr>
142 *         <td>Cp1142</td>
143 *         <td>IBM01142</td>
144 *         <td>CCSID01142 CP01142 ebcdic-dk-277+euro ebcdic-no-277+euro </td>
145 *     </tr>
146 *     <tr>
147 *         <td>Cp1143</td>
148 *         <td>IBM01143</td>
149 *         <td>CCSID01143 CP01143 ebcdic-fi-278+euro ebcdic-se-278+euro </td>
150 *     </tr>
151 *     <tr>
152 *         <td>Cp1144</td>
153 *         <td>IBM01144</td>
154 *         <td>CCSID01144 CP01144 ebcdic-it-280+euro </td>
155 *     </tr>
156 *     <tr>
157 *         <td>Cp1145</td>
158 *         <td>IBM01145</td>
159 *         <td>CCSID01145 CP01145 ebcdic-es-284+euro </td>
160 *     </tr>
161 *     <tr>
162 *         <td>Cp1146</td>
163 *         <td>IBM01146</td>
164 *         <td>CCSID01146 CP01146 ebcdic-gb-285+euro </td>
165 *     </tr>
166 *     <tr>
167 *         <td>Cp1147</td>
168 *         <td>IBM01147</td>
169 *         <td>CCSID01147 CP01147 ebcdic-fr-297+euro </td>
170 *     </tr>
171 *     <tr>
172 *         <td>Cp1148</td>
173 *         <td>IBM01148</td>
174 *         <td>CCSID01148 CP01148 ebcdic-international-500+euro </td>
175 *     </tr>
176 *     <tr>
177 *         <td>Cp1149</td>
178 *         <td>IBM01149</td>
179 *         <td>CCSID01149 CP01149 ebcdic-is-871+euro </td>
180 *     </tr>
181 *     <tr>
182 *         <td>Cp1250</td>
183 *         <td>windows-1250</td>
184 *         <td></td>
185 *     </tr>
186 *     <tr>
187 *         <td>Cp1251</td>
188 *         <td>windows-1251</td>
189 *         <td></td>
190 *     </tr>
191 *     <tr>
192 *         <td>Cp1252</td>
193 *         <td>windows-1252</td>
194 *         <td></td>
195 *     </tr>
196 *     <tr>
197 *         <td>Cp1253</td>
198 *         <td>windows-1253</td>
199 *         <td></td>
200 *     </tr>
201 *     <tr>
202 *         <td>Cp1254</td>
203 *         <td>windows-1254</td>
204 *         <td></td>
205 *     </tr>
206 *     <tr>
207 *         <td>Cp1255</td>
208 *         <td>windows-1255</td>
209 *         <td></td>
210 *     </tr>
211 *     <tr>
212 *         <td>Cp1256</td>
213 *         <td>windows-1256</td>
214 *         <td></td>
215 *     </tr>
216 *     <tr>
217 *         <td>Cp1257</td>
218 *         <td>windows-1257</td>
219 *         <td></td>
220 *     </tr>
221 *     <tr>
222 *         <td>Cp1258</td>
223 *         <td>windows-1258</td>
224 *         <td></td>
225 *     </tr>
226 *     <tr>
227 *         <td>Cp1381</td>
228 *         <td>?</td>
229 *         <td></td>
230 *     </tr>
231 *     <tr>
232 *         <td>Cp1383</td>
233 *         <td>?</td>
234 *         <td></td>
235 *     </tr>
236 *     <tr>
237 *         <td>Cp273</td>
238 *         <td>IBM273</td>
239 *         <td>csIBM273 </td>
240 *     </tr>
241 *     <tr>
242 *         <td>Cp277</td>
243 *         <td>IBM277</td>
244 *         <td>EBCDIC-CP-DK EBCDIC-CP-NO csIBM277 </td>
245 *     </tr>
246 *     <tr>
247 *         <td>Cp278</td>
248 *         <td>IBM278</td>
249 *         <td>CP278 ebcdic-cp-fi ebcdic-cp-se csIBM278 </td>
250 *     </tr>
251 *     <tr>
252 *         <td>Cp280</td>
253 *         <td>IBM280</td>
254 *         <td>ebcdic-cp-it csIBM280 </td>
255 *     </tr>
256 *     <tr>
257 *         <td>Cp284</td>
258 *         <td>IBM284</td>
259 *         <td>ebcdic-cp-es csIBM284 </td>
260 *     </tr>
261 *     <tr>
262 *         <td>Cp285</td>
263 *         <td>IBM285</td>
264 *         <td>ebcdic-cp-gb csIBM285 </td>
265 *     </tr>
266 *     <tr>
267 *         <td>Cp297</td>
268 *         <td>IBM297</td>
269 *         <td>ebcdic-cp-fr csIBM297 </td>
270 *     </tr>
271 *     <tr>
272 *         <td>Cp33722</td>
273 *         <td>?</td>
274 *         <td></td>
275 *     </tr>
276 *     <tr>
277 *         <td>Cp420</td>
278 *         <td>IBM420</td>
279 *         <td>ebcdic-cp-ar1 csIBM420 </td>
280 *     </tr>
281 *     <tr>
282 *         <td>Cp424</td>
283 *         <td>IBM424</td>
284 *         <td>ebcdic-cp-he csIBM424 </td>
285 *     </tr>
286 *     <tr>
287 *         <td>Cp437</td>
288 *         <td>IBM437</td>
289 *         <td>437 csPC8CodePage437 </td>
290 *     </tr>
291 *     <tr>
292 *         <td>Cp500</td>
293 *         <td>IBM500</td>
294 *         <td>ebcdic-cp-be ebcdic-cp-ch csIBM500 </td>
295 *     </tr>
296 *     <tr>
297 *         <td>Cp737</td>
298 *         <td>?</td>
299 *         <td></td>
300 *     </tr>
301 *     <tr>
302 *         <td>Cp775</td>
303 *         <td>IBM775</td>
304 *         <td>csPC775Baltic </td>
305 *     </tr>
306 *     <tr>
307 *         <td>Cp838</td>
308 *         <td>IBM-Thai</td>
309 *         <td></td>
310 *     </tr>
311 *     <tr>
312 *         <td>Cp850</td>
313 *         <td>IBM850</td>
314 *         <td>850 csPC850Multilingual </td>
315 *     </tr>
316 *     <tr>
317 *         <td>Cp852</td>
318 *         <td>IBM852</td>
319 *         <td>852 csPCp852 </td>
320 *     </tr>
321 *     <tr>
322 *         <td>Cp855</td>
323 *         <td>IBM855</td>
324 *         <td>855 csIBM855 </td>
325 *     </tr>
326 *     <tr>
327 *         <td>Cp856</td>
328 *         <td>?</td>
329 *         <td></td>
330 *     </tr>
331 *     <tr>
332 *         <td>Cp857</td>
333 *         <td>IBM857</td>
334 *         <td>857 csIBM857 </td>
335 *     </tr>
336 *     <tr>
337 *         <td>Cp858</td>
338 *         <td>IBM00858</td>
339 *         <td>CCSID00858 CP00858 PC-Multilingual-850+euro </td>
340 *     </tr>
341 *     <tr>
342 *         <td>Cp860</td>
343 *         <td>IBM860</td>
344 *         <td>860 csIBM860 </td>
345 *     </tr>
346 *     <tr>
347 *         <td>Cp861</td>
348 *         <td>IBM861</td>
349 *         <td>861 cp-is csIBM861 </td>
350 *     </tr>
351 *     <tr>
352 *         <td>Cp862</td>
353 *         <td>IBM862</td>
354 *         <td>862 csPC862LatinHebrew </td>
355 *     </tr>
356 *     <tr>
357 *         <td>Cp863</td>
358 *         <td>IBM863</td>
359 *         <td>863 csIBM863 </td>
360 *     </tr>
361 *     <tr>
362 *         <td>Cp864</td>
363 *         <td>IBM864</td>
364 *         <td>cp864 csIBM864 </td>
365 *     </tr>
366 *     <tr>
367 *         <td>Cp865</td>
368 *         <td>IBM865</td>
369 *         <td>865 csIBM865 </td>
370 *     </tr>
371 *     <tr>
372 *         <td>Cp866</td>
373 *         <td>IBM866</td>
374 *         <td>866 csIBM866 </td>
375 *     </tr>
376 *     <tr>
377 *         <td>Cp868</td>
378 *         <td>IBM868</td>
379 *         <td>cp-ar csIBM868 </td>
380 *     </tr>
381 *     <tr>
382 *         <td>Cp869</td>
383 *         <td>IBM869</td>
384 *         <td>cp-gr csIBM869 </td>
385 *     </tr>
386 *     <tr>
387 *         <td>Cp870</td>
388 *         <td>IBM870</td>
389 *         <td>ebcdic-cp-roece ebcdic-cp-yu csIBM870 </td>
390 *     </tr>
391 *     <tr>
392 *         <td>Cp871</td>
393 *         <td>IBM871</td>
394 *         <td>ebcdic-cp-is csIBM871 </td>
395 *     </tr>
396 *     <tr>
397 *         <td>Cp875</td>
398 *         <td>?</td>
399 *         <td></td>
400 *     </tr>
401 *     <tr>
402 *         <td>Cp918</td>
403 *         <td>IBM918</td>
404 *         <td>ebcdic-cp-ar2 csIBM918 </td>
405 *     </tr>
406 *     <tr>
407 *         <td>Cp921</td>
408 *         <td>?</td>
409 *         <td></td>
410 *     </tr>
411 *     <tr>
412 *         <td>Cp922</td>
413 *         <td>?</td>
414 *         <td></td>
415 *     </tr>
416 *     <tr>
417 *         <td>Cp930</td>
418 *         <td>?</td>
419 *         <td></td>
420 *     </tr>
421 *     <tr>
422 *         <td>Cp933</td>
423 *         <td>?</td>
424 *         <td></td>
425 *     </tr>
426 *     <tr>
427 *         <td>Cp935</td>
428 *         <td>?</td>
429 *         <td></td>
430 *     </tr>
431 *     <tr>
432 *         <td>Cp937</td>
433 *         <td>?</td>
434 *         <td></td>
435 *     </tr>
436 *     <tr>
437 *         <td>Cp939</td>
438 *         <td>?</td>
439 *         <td></td>
440 *     </tr>
441 *     <tr>
442 *         <td>Cp942</td>
443 *         <td>?</td>
444 *         <td></td>
445 *     </tr>
446 *     <tr>
447 *         <td>Cp942C</td>
448 *         <td>?</td>
449 *         <td></td>
450 *     </tr>
451 *     <tr>
452 *         <td>Cp943</td>
453 *         <td>?</td>
454 *         <td></td>
455 *     </tr>
456 *     <tr>
457 *         <td>Cp943C</td>
458 *         <td>?</td>
459 *         <td></td>
460 *     </tr>
461 *     <tr>
462 *         <td>Cp948</td>
463 *         <td>?</td>
464 *         <td></td>
465 *     </tr>
466 *     <tr>
467 *         <td>Cp949</td>
468 *         <td>?</td>
469 *         <td></td>
470 *     </tr>
471 *     <tr>
472 *         <td>Cp949C</td>
473 *         <td>?</td>
474 *         <td></td>
475 *     </tr>
476 *     <tr>
477 *         <td>Cp950</td>
478 *         <td>?</td>
479 *         <td></td>
480 *     </tr>
481 *     <tr>
482 *         <td>Cp964</td>
483 *         <td>?</td>
484 *         <td></td>
485 *     </tr>
486 *     <tr>
487 *         <td>Cp970</td>
488 *         <td>?</td>
489 *         <td></td>
490 *     </tr>
491 *     <tr>
492 *         <td>EUC_CN</td>
493 *         <td>GB2312</td>
494 *         <td>x-EUC-CN csGB2312 euccn euc-cn gb2312-80 gb2312-1980 CN-GB CN-GB-ISOIR165 </td>
495 *     </tr>
496 *     <tr>
497 *         <td>EUC_JP</td>
498 *         <td>EUC-JP</td>
499 *         <td>csEUCPkdFmtJapanese Extended_UNIX_Code_Packed_Format_for_Japanese eucjis x-eucjp eucjp x-euc-jp </td>
500 *     </tr>
501 *     <tr>
502 *         <td>EUC_JP_LINUX</td>
503 *         <td>?</td>
504 *         <td></td>
505 *     </tr>
506 *     <tr>
507 *         <td>EUC_JP_Solaris</td>
508 *         <td>?</td>
509 *         <td></td>
510 *     </tr>
511 *     <tr>
512 *         <td>EUC_KR</td>
513 *         <td>EUC-KR</td>
514 *         <td>csEUCKR ksc5601 5601 ksc5601_1987 ksc_5601 ksc5601-1987 ks_c_5601-1987 euckr </td>
515 *     </tr>
516 *     <tr>
517 *         <td>EUC_TW</td>
518 *         <td>EUC-TW</td>
519 *         <td>x-EUC-TW cns11643 euctw </td>
520 *     </tr>
521 *     <tr>
522 *         <td>GB18030</td>
523 *         <td>GB18030</td>
524 *         <td>gb18030-2000 </td>
525 *     </tr>
526 *     <tr>
527 *         <td>GBK</td>
528 *         <td>windows-936</td>
529 *         <td>CP936 MS936 ms_936 x-mswin-936 </td>
530 *     </tr>
531 *     <tr>
532 *         <td>ISCII91</td>
533 *         <td>?</td>
534 *         <td>x-ISCII91 iscii </td>
535 *     </tr>
536 *     <tr>
537 *         <td>ISO2022CN</td>
538 *         <td>ISO-2022-CN</td>
539 *         <td></td>
540 *     </tr>
541 *     <tr>
542 *         <td>ISO2022JP</td>
543 *         <td>ISO-2022-JP</td>
544 *         <td>csISO2022JP JIS jis_encoding csjisencoding </td>
545 *     </tr>
546 *     <tr>
547 *         <td>ISO2022KR</td>
548 *         <td>ISO-2022-KR</td>
549 *         <td>csISO2022KR </td>
550 *     </tr>
551 *     <tr>
552 *         <td>ISO2022_CN_CNS</td>
553 *         <td>?</td>
554 *         <td></td>
555 *     </tr>
556 *     <tr>
557 *         <td>ISO2022_CN_GB</td>
558 *         <td>?</td>
559 *         <td></td>
560 *     </tr>
561 *     <tr>
562 *         <td>ISO8859_1</td>
563 *         <td>ISO-8859-1</td>
564 *         <td>ISO_8859-1:1987 iso-ir-100 ISO_8859-1 latin1 l1 IBM819 CP819 csISOLatin1 8859_1 819 IBM-819 ISO8859-1 ISO_8859_1 </td>
565 *     </tr>
566 *     <tr>
567 *         <td>ISO8859_13</td>
568 *         <td>ISO-8859-13</td>
569 *         <td></td>
570 *     </tr>
571 *     <tr>
572 *         <td>ISO8859_15</td>
573 *         <td>ISO-8859-15</td>
574 *         <td>ISO_8859-15 Latin-9 8859_15 csISOlatin9 IBM923 cp923 923 L9 IBM-923 ISO8859-15 LATIN9 LATIN0 csISOlatin0 ISO8859_15_FDIS </td>
575 *     </tr>
576 *     <tr>
577 *         <td>ISO8859_2</td>
578 *         <td>ISO-8859-2</td>
579 *         <td>ISO_8859-2:1987 iso-ir-101 ISO_8859-2 latin2 l2 csISOLatin2 8859_2 iso8859_2 </td>
580 *     </tr>
581 *     <tr>
582 *         <td>ISO8859_3</td>
583 *         <td>ISO-8859-3</td>
584 *         <td>ISO_8859-3:1988 iso-ir-109 ISO_8859-3 latin3 l3 csISOLatin3 8859_3 </td>
585 *     </tr>
586 *     <tr>
587 *         <td>ISO8859_4</td>
588 *         <td>ISO-8859-4</td>
589 *         <td>ISO_8859-4:1988 iso-ir-110 ISO_8859-4 latin4 l4 csISOLatin4 8859_4 </td>
590 *     </tr>
591 *     <tr>
592 *         <td>ISO8859_5</td>
593 *         <td>ISO-8859-5</td>
594 *         <td>ISO_8859-5:1988 iso-ir-144 ISO_8859-5 cyrillic csISOLatinCyrillic 8859_5 </td>
595 *     </tr>
596 *     <tr>
597 *         <td>ISO8859_6</td>
598 *         <td>ISO-8859-6</td>
599 *         <td>ISO_8859-6:1987 iso-ir-127 ISO_8859-6 ECMA-114 ASMO-708 arabic csISOLatinArabic 8859_6 </td>
600 *     </tr>
601 *     <tr>
602 *         <td>ISO8859_7</td>
603 *         <td>ISO-8859-7</td>
604 *         <td>ISO_8859-7:1987 iso-ir-126 ISO_8859-7 ELOT_928 ECMA-118 greek greek8 csISOLatinGreek 8859_7 sun_eu_greek </td>
605 *     </tr>
606 *     <tr>
607 *         <td>ISO8859_8</td>
608 *         <td>ISO-8859-8</td>
609 *         <td>ISO_8859-8:1988 iso-ir-138 ISO_8859-8 hebrew csISOLatinHebrew 8859_8 </td>
610 *     </tr>
611 *     <tr>
612 *         <td>ISO8859_9</td>
613 *         <td>ISO-8859-9</td>
614 *         <td>ISO_8859-9:1989 iso-ir-148 ISO_8859-9 latin5 l5 csISOLatin5 8859_9 </td>
615 *     </tr>
616 *     <tr>
617 *         <td>JISAutoDetect</td>
618 *         <td>?</td>
619 *         <td></td>
620 *     </tr>
621 *     <tr>
622 *         <td>JIS_C6626-1983</td>
623 *         <td>JIS_C6626-1983</td>
624 *         <td>x-JIS0208 JIS0208 csISO87JISX0208 x0208 JIS_X0208-1983 iso-ir-87 </td>
625 *     </tr>
626 *     <tr>
627 *         <td>JIS_X0201</td>
628 *         <td>JIS_X0201</td>
629 *         <td>X0201 JIS0201 csHalfWidthKatakana </td>
630 *     </tr>
631 *     <tr>
632 *         <td>JIS_X0212-1990</td>
633 *         <td>JIS_X0212-1990</td>
634 *         <td>iso-ir-159 x0212 JIS0212 csISO159JISX02121990 </td>
635 *     </tr>
636 *     <tr>
637 *         <td>KOI8_R</td>
638 *         <td>KOI8-R</td>
639 *         <td>csKOI8R koi8 </td>
640 *     </tr>
641 *     <tr>
642 *         <td>MS874</td>
643 *         <td>windows-874</td>
644 *         <td>cp874 </td>
645 *     </tr>
646 *     <tr>
647 *         <td>MS932</td>
648 *         <td>Windows-31J</td>
649 *         <td>windows-932 csWindows31J x-ms-cp932 </td>
650 *     </tr>
651 *     <tr>
652 *         <td>MS949</td>
653 *         <td>windows-949</td>
654 *         <td>windows949 ms_949 x-windows-949 </td>
655 *     </tr>
656 *     <tr>
657 *         <td>MS950</td>
658 *         <td>windows-950</td>
659 *         <td>x-windows-950 </td>
660 *     </tr>
661 *     <tr>
662 *         <td>MS950_HKSCS</td>
663 *         <td></td>
664 *         <td></td>
665 *     </tr>
666 *     <tr>
667 *         <td>MacArabic</td>
668 *         <td>?</td>
669 *         <td></td>
670 *     </tr>
671 *     <tr>
672 *         <td>MacCentralEurope</td>
673 *         <td>?</td>
674 *         <td></td>
675 *     </tr>
676 *     <tr>
677 *         <td>MacCroatian</td>
678 *         <td>?</td>
679 *         <td></td>
680 *     </tr>
681 *     <tr>
682 *         <td>MacCyrillic</td>
683 *         <td>?</td>
684 *         <td></td>
685 *     </tr>
686 *     <tr>
687 *         <td>MacDingbat</td>
688 *         <td>?</td>
689 *         <td></td>
690 *     </tr>
691 *     <tr>
692 *         <td>MacGreek</td>
693 *         <td>MacGreek</td>
694 *         <td></td>
695 *     </tr>
696 *     <tr>
697 *         <td>MacHebrew</td>
698 *         <td>?</td>
699 *         <td></td>
700 *     </tr>
701 *     <tr>
702 *         <td>MacIceland</td>
703 *         <td>?</td>
704 *         <td></td>
705 *     </tr>
706 *     <tr>
707 *         <td>MacRoman</td>
708 *         <td>MacRoman</td>
709 *         <td>Macintosh MAC csMacintosh </td>
710 *     </tr>
711 *     <tr>
712 *         <td>MacRomania</td>
713 *         <td>?</td>
714 *         <td></td>
715 *     </tr>
716 *     <tr>
717 *         <td>MacSymbol</td>
718 *         <td>?</td>
719 *         <td></td>
720 *     </tr>
721 *     <tr>
722 *         <td>MacThai</td>
723 *         <td>?</td>
724 *         <td></td>
725 *     </tr>
726 *     <tr>
727 *         <td>MacTurkish</td>
728 *         <td>?</td>
729 *         <td></td>
730 *     </tr>
731 *     <tr>
732 *         <td>MacUkraine</td>
733 *         <td>?</td>
734 *         <td></td>
735 *     </tr>
736 *     <tr>
737 *         <td>SJIS</td>
738 *         <td>Shift_JIS</td>
739 *         <td>MS_Kanji csShiftJIS shift-jis x-sjis pck </td>
740 *     </tr>
741 *     <tr>
742 *         <td>TIS620</td>
743 *         <td>TIS-620</td>
744 *         <td></td>
745 *     </tr>
746 *     <tr>
747 *         <td>UTF-16</td>
748 *         <td>UTF-16</td>
749 *         <td>UTF_16 </td>
750 *     </tr>
751 *     <tr>
752 *         <td>UTF8</td>
753 *         <td>UTF-8</td>
754 *         <td></td>
755 *     </tr>
756 *     <tr>
757 *         <td>UnicodeBig</td>
758 *         <td>?</td>
759 *         <td></td>
760 *     </tr>
761 *     <tr>
762 *         <td>UnicodeBigUnmarked</td>
763 *         <td>UTF-16BE</td>
764 *         <td>X-UTF-16BE UTF_16BE ISO-10646-UCS-2 </td>
765 *     </tr>
766 *     <tr>
767 *         <td>UnicodeLittle</td>
768 *         <td>?</td>
769 *         <td></td>
770 *     </tr>
771 *     <tr>
772 *         <td>UnicodeLittleUnmarked</td>
773 *         <td>UTF-16LE</td>
774 *         <td>UTF_16LE X-UTF-16LE </td>
775 *     </tr>
776 *     <tr>
777 *         <td>x-Johab</td>
778 *         <td>johab</td>
779 *         <td>johab cp1361 ms1361 ksc5601-1992 ksc5601_1992 </td>
780 *     </tr>
781 *     <tr>
782 *         <td>x-iso-8859-11</td>
783 *         <td>?</td>
784 *         <td></td>
785 *     </tr>
786 * </table>
787 *
788 *
789 * @version $Id: CharsetUtil.java,v 1.1 2004/10/25 07:26:46 ntherning Exp $
790 */
791public class CharsetUtil {
792    private static Log log = LogFactory.getLog(CharsetUtil.class);
793
794    private static class Charset implements Comparable {
795        private String canonical = null;
796        private String mime = null;
797        private String[] aliases = null;
798
799        private Charset(String canonical, String mime, String[] aliases) {
800            this.canonical = canonical;
801            this.mime = mime;
802            this.aliases = aliases;
803        }
804
805        public int compareTo(Object o) {
806            Charset c = (Charset) o;
807            return this.canonical.compareTo(c.canonical);
808        }
809    }
810
811    private static Charset[] JAVA_CHARSETS = {
812        new Charset("ISO8859_1", "ISO-8859-1",
813                    new String[] {"ISO_8859-1:1987", "iso-ir-100", "ISO_8859-1",
814                                  "latin1", "l1", "IBM819", "CP819",
815                                  "csISOLatin1", "8859_1", "819", "IBM-819",
816                                  "ISO8859-1", "ISO_8859_1"}),
817        new Charset("ISO8859_2", "ISO-8859-2",
818                    new String[] {"ISO_8859-2:1987", "iso-ir-101", "ISO_8859-2",
819                                  "latin2", "l2", "csISOLatin2", "8859_2",
820                                  "iso8859_2"}),
821        new Charset("ISO8859_3", "ISO-8859-3", new String[] {"ISO_8859-3:1988", "iso-ir-109", "ISO_8859-3", "latin3", "l3", "csISOLatin3", "8859_3"}),
822        new Charset("ISO8859_4", "ISO-8859-4",
823                    new String[] {"ISO_8859-4:1988", "iso-ir-110", "ISO_8859-4",
824                                  "latin4", "l4", "csISOLatin4", "8859_4"}),
825        new Charset("ISO8859_5", "ISO-8859-5",
826                    new String[] {"ISO_8859-5:1988", "iso-ir-144", "ISO_8859-5",
827                                  "cyrillic", "csISOLatinCyrillic", "8859_5"}),
828        new Charset("ISO8859_6", "ISO-8859-6", new String[] {"ISO_8859-6:1987", "iso-ir-127", "ISO_8859-6", "ECMA-114", "ASMO-708", "arabic", "csISOLatinArabic", "8859_6"}),
829        new Charset("ISO8859_7", "ISO-8859-7",
830                    new String[] {"ISO_8859-7:1987", "iso-ir-126", "ISO_8859-7",
831                                  "ELOT_928", "ECMA-118", "greek", "greek8",
832                                  "csISOLatinGreek", "8859_7", "sun_eu_greek"}),
833        new Charset("ISO8859_8", "ISO-8859-8", new String[] {"ISO_8859-8:1988", "iso-ir-138", "ISO_8859-8", "hebrew", "csISOLatinHebrew", "8859_8"}),
834        new Charset("ISO8859_9", "ISO-8859-9",
835                    new String[] {"ISO_8859-9:1989", "iso-ir-148", "ISO_8859-9",
836                                  "latin5", "l5", "csISOLatin5", "8859_9"}),
837
838        new Charset("ISO8859_13", "ISO-8859-13", new String[] {}),
839        new Charset("ISO8859_15", "ISO-8859-15",
840                    new String[] {"ISO_8859-15", "Latin-9", "8859_15",
841                                  "csISOlatin9", "IBM923", "cp923", "923", "L9",
842                                  "IBM-923", "ISO8859-15", "LATIN9", "LATIN0",
843                                  "csISOlatin0", "ISO8859_15_FDIS"}),
844        new Charset("KOI8_R", "KOI8-R", new String[] {"csKOI8R", "koi8"}),
845        new Charset("ASCII", "US-ASCII",
846                    new String[] {"ANSI_X3.4-1968", "iso-ir-6",
847                                  "ANSI_X3.4-1986", "ISO_646.irv:1991",
848                                  "ISO646-US", "us", "IBM367", "cp367",
849                                  "csASCII", "ascii7", "646", "iso_646.irv:1983"}),
850        new Charset("UTF8", "UTF-8", new String[] {}),
851        new Charset("UTF-16", "UTF-16", new String[] {"UTF_16"}),
852        new Charset("UnicodeBigUnmarked", "UTF-16BE", new String[] {"X-UTF-16BE", "UTF_16BE", "ISO-10646-UCS-2"}),
853        new Charset("UnicodeLittleUnmarked", "UTF-16LE", new String[] {"UTF_16LE", "X-UTF-16LE"}),
854        new Charset("Big5", "Big5", new String[] {"csBig5", "CN-Big5", "BIG-FIVE", "BIGFIVE"}),
855        new Charset("Big5_HKSCS", "Big5-HKSCS", new String[] {"big5hkscs"}),
856        new Charset("EUC_JP", "EUC-JP",
857                    new String[] {"csEUCPkdFmtJapanese",
858                              "Extended_UNIX_Code_Packed_Format_for_Japanese",
859                              "eucjis", "x-eucjp", "eucjp", "x-euc-jp"}),
860        new Charset("EUC_KR", "EUC-KR",
861                    new String[] {"csEUCKR", "ksc5601", "5601", "ksc5601_1987",
862                                  "ksc_5601", "ksc5601-1987", "ks_c_5601-1987",
863                                  "euckr"}),
864        new Charset("GB18030", "GB18030", new String[] {"gb18030-2000"}),
865        new Charset("EUC_CN", "GB2312", new String[] {"x-EUC-CN", "csGB2312", "euccn", "euc-cn", "gb2312-80", "gb2312-1980", "CN-GB", "CN-GB-ISOIR165"}),
866        new Charset("GBK", "windows-936", new String[] {"CP936", "MS936", "ms_936", "x-mswin-936"}),
867
868        new Charset("Cp037", "IBM037", new String[] {"ebcdic-cp-us", "ebcdic-cp-ca", "ebcdic-cp-wt", "ebcdic-cp-nl", "csIBM037"}),
869        new Charset("Cp273", "IBM273", new String[] {"csIBM273"}),
870        new Charset("Cp277", "IBM277", new String[] {"EBCDIC-CP-DK", "EBCDIC-CP-NO", "csIBM277"}),
871        new Charset("Cp278", "IBM278", new String[] {"CP278", "ebcdic-cp-fi", "ebcdic-cp-se", "csIBM278"}),
872        new Charset("Cp280", "IBM280", new String[] {"ebcdic-cp-it", "csIBM280"}),
873        new Charset("Cp284", "IBM284", new String[] {"ebcdic-cp-es", "csIBM284"}),
874        new Charset("Cp285", "IBM285", new String[] {"ebcdic-cp-gb", "csIBM285"}),
875        new Charset("Cp297", "IBM297", new String[] {"ebcdic-cp-fr", "csIBM297"}),
876        new Charset("Cp420", "IBM420", new String[] {"ebcdic-cp-ar1", "csIBM420"}),
877        new Charset("Cp424", "IBM424", new String[] {"ebcdic-cp-he", "csIBM424"}),
878        new Charset("Cp437", "IBM437", new String[] {"437", "csPC8CodePage437"}),
879        new Charset("Cp500", "IBM500", new String[] {"ebcdic-cp-be", "ebcdic-cp-ch", "csIBM500"}),
880        new Charset("Cp775", "IBM775", new String[] {"csPC775Baltic"}),
881        new Charset("Cp838", "IBM-Thai", new String[] {}),
882        new Charset("Cp850", "IBM850", new String[] {"850", "csPC850Multilingual"}),
883        new Charset("Cp852", "IBM852", new String[] {"852", "csPCp852"}),
884        new Charset("Cp855", "IBM855", new String[] {"855", "csIBM855"}),
885        new Charset("Cp857", "IBM857", new String[] {"857", "csIBM857"}),
886        new Charset("Cp858", "IBM00858",
887                new String[] {"CCSID00858", "CP00858",
888                              "PC-Multilingual-850+euro"}),
889        new Charset("Cp860", "IBM860", new String[] {"860", "csIBM860"}),
890        new Charset("Cp861", "IBM861", new String[] {"861", "cp-is", "csIBM861"}),
891        new Charset("Cp862", "IBM862", new String[] {"862", "csPC862LatinHebrew"}),
892        new Charset("Cp863", "IBM863", new String[] {"863", "csIBM863"}),
893        new Charset("Cp864", "IBM864", new String[] {"cp864", "csIBM864"}),
894        new Charset("Cp865", "IBM865", new String[] {"865", "csIBM865"}),
895        new Charset("Cp866", "IBM866", new String[] {"866", "csIBM866"}),
896        new Charset("Cp868", "IBM868", new String[] {"cp-ar", "csIBM868"}),
897        new Charset("Cp869", "IBM869", new String[] {"cp-gr", "csIBM869"}),
898        new Charset("Cp870", "IBM870", new String[] {"ebcdic-cp-roece", "ebcdic-cp-yu", "csIBM870"}),
899        new Charset("Cp871", "IBM871", new String[] {"ebcdic-cp-is", "csIBM871"}),
900        new Charset("Cp918", "IBM918", new String[] {"ebcdic-cp-ar2", "csIBM918"}),
901        new Charset("Cp1026", "IBM1026", new String[] {"csIBM1026"}),
902        new Charset("Cp1047", "IBM1047", new String[] {"IBM-1047"}),
903        new Charset("Cp1140", "IBM01140",
904                    new String[] {"CCSID01140", "CP01140",
905                                  "ebcdic-us-37+euro"}),
906        new Charset("Cp1141", "IBM01141",
907                    new String[] {"CCSID01141", "CP01141",
908                                  "ebcdic-de-273+euro"}),
909        new Charset("Cp1142", "IBM01142", new String[] {"CCSID01142", "CP01142", "ebcdic-dk-277+euro", "ebcdic-no-277+euro"}),
910        new Charset("Cp1143", "IBM01143", new String[] {"CCSID01143", "CP01143", "ebcdic-fi-278+euro", "ebcdic-se-278+euro"}),
911        new Charset("Cp1144", "IBM01144", new String[] {"CCSID01144", "CP01144", "ebcdic-it-280+euro"}),
912        new Charset("Cp1145", "IBM01145", new String[] {"CCSID01145", "CP01145", "ebcdic-es-284+euro"}),
913        new Charset("Cp1146", "IBM01146", new String[] {"CCSID01146", "CP01146", "ebcdic-gb-285+euro"}),
914        new Charset("Cp1147", "IBM01147", new String[] {"CCSID01147", "CP01147", "ebcdic-fr-297+euro"}),
915        new Charset("Cp1148", "IBM01148", new String[] {"CCSID01148", "CP01148", "ebcdic-international-500+euro"}),
916        new Charset("Cp1149", "IBM01149", new String[] {"CCSID01149", "CP01149", "ebcdic-is-871+euro"}),
917        new Charset("Cp1250", "windows-1250", new String[] {}),
918        new Charset("Cp1251", "windows-1251", new String[] {}),
919        new Charset("Cp1252", "windows-1252", new String[] {}),
920        new Charset("Cp1253", "windows-1253", new String[] {}),
921        new Charset("Cp1254", "windows-1254", new String[] {}),
922        new Charset("Cp1255", "windows-1255", new String[] {}),
923        new Charset("Cp1256", "windows-1256", new String[] {}),
924        new Charset("Cp1257", "windows-1257", new String[] {}),
925        new Charset("Cp1258", "windows-1258", new String[] {}),
926        new Charset("ISO2022CN", "ISO-2022-CN", new String[] {}),
927        new Charset("ISO2022JP", "ISO-2022-JP", new String[] {"csISO2022JP", "JIS", "jis_encoding", "csjisencoding"}),
928        new Charset("ISO2022KR", "ISO-2022-KR", new String[] {"csISO2022KR"}),
929        new Charset("JIS_X0201", "JIS_X0201", new String[] {"X0201", "JIS0201", "csHalfWidthKatakana"}),
930        new Charset("JIS_X0212-1990", "JIS_X0212-1990", new String[] {"iso-ir-159", "x0212", "JIS0212", "csISO159JISX02121990"}),
931        new Charset("JIS_C6626-1983", "JIS_C6626-1983", new String[] {"x-JIS0208", "JIS0208", "csISO87JISX0208", "x0208", "JIS_X0208-1983", "iso-ir-87"}),
932        new Charset("SJIS", "Shift_JIS", new String[] {"MS_Kanji", "csShiftJIS", "shift-jis", "x-sjis", "pck"}),
933        new Charset("TIS620", "TIS-620", new String[] {}),
934        new Charset("MS932", "Windows-31J", new String[] {"windows-932", "csWindows31J", "x-ms-cp932"}),
935        new Charset("EUC_TW", "EUC-TW", new String[] {"x-EUC-TW", "cns11643", "euctw"}),
936        new Charset("x-Johab", "johab", new String[] {"johab", "cp1361", "ms1361", "ksc5601-1992", "ksc5601_1992"}),
937        new Charset("MS950_HKSCS", "", new String[] {}),
938        new Charset("MS874", "windows-874", new String[] {"cp874"}),
939        new Charset("MS949", "windows-949", new String[] {"windows949", "ms_949", "x-windows-949"}),
940        new Charset("MS950", "windows-950", new String[] {"x-windows-950"}),
941
942        new Charset("Cp737", null, new String[] {}),
943        new Charset("Cp856", null, new String[] {}),
944        new Charset("Cp875", null, new String[] {}),
945        new Charset("Cp921", null, new String[] {}),
946        new Charset("Cp922", null, new String[] {}),
947        new Charset("Cp930", null, new String[] {}),
948        new Charset("Cp933", null, new String[] {}),
949        new Charset("Cp935", null, new String[] {}),
950        new Charset("Cp937", null, new String[] {}),
951        new Charset("Cp939", null, new String[] {}),
952        new Charset("Cp942", null, new String[] {}),
953        new Charset("Cp942C", null, new String[] {}),
954        new Charset("Cp943", null, new String[] {}),
955        new Charset("Cp943C", null, new String[] {}),
956        new Charset("Cp948", null, new String[] {}),
957        new Charset("Cp949", null, new String[] {}),
958        new Charset("Cp949C", null, new String[] {}),
959        new Charset("Cp950", null, new String[] {}),
960        new Charset("Cp964", null, new String[] {}),
961        new Charset("Cp970", null, new String[] {}),
962        new Charset("Cp1006", null, new String[] {}),
963        new Charset("Cp1025", null, new String[] {}),
964        new Charset("Cp1046", null, new String[] {}),
965        new Charset("Cp1097", null, new String[] {}),
966        new Charset("Cp1098", null, new String[] {}),
967        new Charset("Cp1112", null, new String[] {}),
968        new Charset("Cp1122", null, new String[] {}),
969        new Charset("Cp1123", null, new String[] {}),
970        new Charset("Cp1124", null, new String[] {}),
971        new Charset("Cp1381", null, new String[] {}),
972        new Charset("Cp1383", null, new String[] {}),
973        new Charset("Cp33722", null, new String[] {}),
974        new Charset("Big5_Solaris", null, new String[] {}),
975        new Charset("EUC_JP_LINUX", null, new String[] {}),
976        new Charset("EUC_JP_Solaris", null, new String[] {}),
977        new Charset("ISCII91", null, new String[] {"x-ISCII91", "iscii"}),
978        new Charset("ISO2022_CN_CNS", null, new String[] {}),
979        new Charset("ISO2022_CN_GB", null, new String[] {}),
980        new Charset("x-iso-8859-11", null, new String[] {}),
981        new Charset("JISAutoDetect", null, new String[] {}),
982        new Charset("MacArabic", null, new String[] {}),
983        new Charset("MacCentralEurope", null, new String[] {}),
984        new Charset("MacCroatian", null, new String[] {}),
985        new Charset("MacCyrillic", null, new String[] {}),
986        new Charset("MacDingbat", null, new String[] {}),
987        new Charset("MacGreek", "MacGreek", new String[] {}),
988        new Charset("MacHebrew", null, new String[] {}),
989        new Charset("MacIceland", null, new String[] {}),
990        new Charset("MacRoman", "MacRoman", new String[] {"Macintosh", "MAC", "csMacintosh"}),
991        new Charset("MacRomania", null, new String[] {}),
992        new Charset("MacSymbol", null, new String[] {}),
993        new Charset("MacThai", null, new String[] {}),
994        new Charset("MacTurkish", null, new String[] {}),
995        new Charset("MacUkraine", null, new String[] {}),
996        new Charset("UnicodeBig", null, new String[] {}),
997        new Charset("UnicodeLittle", null, new String[] {})
998    };
999
1000    /**
1001     * Contains the canonical names of character sets which can be used to
1002     * decode bytes into Java chars.
1003     */
1004    private static TreeSet decodingSupported = null;
1005
1006    /**
1007     * Contains the canonical names of character sets which can be used to
1008     * encode Java chars into bytes.
1009     */
1010    private static TreeSet encodingSupported = null;
1011
1012    /**
1013     * Maps character set names to Charset objects. All possible names of
1014     * a charset will be mapped to the Charset.
1015     */
1016    private static HashMap charsetMap = null;
1017
1018    static {
1019        decodingSupported = new TreeSet();
1020        encodingSupported = new TreeSet();
1021        byte[] dummy = new byte[] {'d', 'u', 'm', 'm', 'y'};
1022        for (int i = 0; i < JAVA_CHARSETS.length; i++) {
1023            try {
1024                String s = new String(dummy, JAVA_CHARSETS[i].canonical);
1025                decodingSupported.add(JAVA_CHARSETS[i].canonical.toLowerCase());
1026            } catch (UnsupportedOperationException e) {
1027            } catch (UnsupportedEncodingException e) {
1028            }
1029            try {
1030                "dummy".getBytes(JAVA_CHARSETS[i].canonical);
1031                encodingSupported.add(JAVA_CHARSETS[i].canonical.toLowerCase());
1032            } catch (UnsupportedOperationException e) {
1033            } catch (UnsupportedEncodingException e) {
1034            }
1035        }
1036
1037        charsetMap = new HashMap();
1038        for (int i = 0; i < JAVA_CHARSETS.length; i++) {
1039            Charset c = JAVA_CHARSETS[i];
1040            charsetMap.put(c.canonical.toLowerCase(), c);
1041            if (c.mime != null) {
1042                charsetMap.put(c.mime.toLowerCase(), c);
1043            }
1044            if (c.aliases != null) {
1045                for (int j = 0; j < c.aliases.length; j++) {
1046                    charsetMap.put(c.aliases[j].toLowerCase(), c);
1047                }
1048            }
1049        }
1050
1051        if (log.isDebugEnabled()) {
1052            log.debug("Character sets which support decoding: "
1053                        + decodingSupported);
1054            log.debug("Character sets which support encoding: "
1055                        + encodingSupported);
1056        }
1057    }
1058
1059    /**
1060     * ANDROID:  THE FOLLOWING SET OF STATIC STRINGS ARE COPIED FROM A NEWER VERSION OF MIME4J
1061     */
1062
1063    /** carriage return - line feed sequence */
1064    public static final String CRLF = "\r\n";
1065
1066    /** US-ASCII CR, carriage return (13) */
1067    public static final int CR = '\r';
1068
1069    /** US-ASCII LF, line feed (10) */
1070    public static final int LF = '\n';
1071
1072    /** US-ASCII SP, space (32) */
1073    public static final int SP = ' ';
1074
1075    /** US-ASCII HT, horizontal-tab (9)*/
1076    public static final int HT = '\t';
1077
1078    public static final java.nio.charset.Charset US_ASCII = java.nio.charset.Charset
1079            .forName("US-ASCII");
1080
1081    public static final java.nio.charset.Charset ISO_8859_1 = java.nio.charset.Charset
1082            .forName("ISO-8859-1");
1083
1084    public static final java.nio.charset.Charset UTF_8 = java.nio.charset.Charset
1085            .forName("UTF-8");
1086
1087    /**
1088     * Returns <code>true</code> if the specified character is a whitespace
1089     * character (CR, LF, SP or HT).
1090     *
1091     * ANDROID:  COPIED FROM A NEWER VERSION OF MIME4J
1092     *
1093     * @param ch
1094     *            character to test.
1095     * @return <code>true</code> if the specified character is a whitespace
1096     *         character, <code>false</code> otherwise.
1097     */
1098    public static boolean isWhitespace(char ch) {
1099        return ch == SP || ch == HT || ch == CR || ch == LF;
1100    }
1101
1102    /**
1103     * Returns <code>true</code> if the specified string consists entirely of
1104     * whitespace characters.
1105     *
1106     * ANDROID:  COPIED FROM A NEWER VERSION OF MIME4J
1107     *
1108     * @param s
1109     *            string to test.
1110     * @return <code>true</code> if the specified string consists entirely of
1111     *         whitespace characters, <code>false</code> otherwise.
1112     */
1113    public static boolean isWhitespace(final String s) {
1114        if (s == null) {
1115            throw new IllegalArgumentException("String may not be null");
1116        }
1117        final int len = s.length();
1118        for (int i = 0; i < len; i++) {
1119            if (!isWhitespace(s.charAt(i))) {
1120                return false;
1121            }
1122        }
1123        return true;
1124    }
1125
1126    /**
1127     * Determines if the VM supports encoding (chars to bytes) the
1128     * specified character set. NOTE: the given character set name may
1129     * not be known to the VM even if this method returns <code>true</code>.
1130     * Use {@link #toJavaCharset(String)} to get the canonical Java character
1131     * set name.
1132     *
1133     * @param charsetName the characters set name.
1134     * @return <code>true</code> if encoding is supported, <code>false</code>
1135     *         otherwise.
1136     */
1137    public static boolean isEncodingSupported(String charsetName) {
1138        return encodingSupported.contains(charsetName.toLowerCase());
1139    }
1140
1141    /**
1142     * Determines if the VM supports decoding (bytes to chars) the
1143     * specified character set. NOTE: the given character set name may
1144     * not be known to the VM even if this method returns <code>true</code>.
1145     * Use {@link #toJavaCharset(String)} to get the canonical Java character
1146     * set name.
1147     *
1148     * @param charsetName the characters set name.
1149     * @return <code>true</code> if decoding is supported, <code>false</code>
1150     *         otherwise.
1151     */
1152    public static boolean isDecodingSupported(String charsetName) {
1153        return decodingSupported.contains(charsetName.toLowerCase());
1154    }
1155
1156    /**
1157     * Gets the preferred MIME character set name for the specified
1158     * character set or <code>null</code> if not known.
1159     *
1160     * @param charsetName the character set name to look for.
1161     * @return the MIME preferred name or <code>null</code> if not known.
1162     */
1163    public static String toMimeCharset(String charsetName) {
1164        Charset c = (Charset) charsetMap.get(charsetName.toLowerCase());
1165        if (c != null) {
1166            return c.mime;
1167        }
1168        return null;
1169    }
1170
1171    /**
1172     * Gets the canonical Java character set name for the specified
1173     * character set or <code>null</code> if not known. This should be
1174     * called before doing any conversions using the Java API. NOTE:
1175     * you must use {@link #isEncodingSupported(String)} or
1176     * {@link #isDecodingSupported(String)} to make sure the returned
1177     * Java character set is supported by the current VM.
1178     *
1179     * @param charsetName the character set name to look for.
1180     * @return the canonical Java name or <code>null</code> if not known.
1181     */
1182    public static String toJavaCharset(String charsetName) {
1183        Charset c = (Charset) charsetMap.get(charsetName.toLowerCase());
1184        if (c != null) {
1185            return c.canonical;
1186        }
1187        return null;
1188    }
1189
1190    public static java.nio.charset.Charset getCharset(String charsetName) {
1191        String defaultCharset = "ISO-8859-1";
1192
1193        // Use the default chareset if given charset is null
1194        if(charsetName == null) charsetName = defaultCharset;
1195
1196        try {
1197            return java.nio.charset.Charset.forName(charsetName);
1198        } catch (IllegalCharsetNameException e) {
1199            log.info("Illegal charset " + charsetName + ", fallback to " + defaultCharset + ": " + e);
1200            // Use default charset on exception
1201            return java.nio.charset.Charset.forName(defaultCharset);
1202        } catch (UnsupportedCharsetException ex) {
1203            log.info("Unsupported charset " + charsetName + ", fallback to " + defaultCharset + ": " + ex);
1204            // Use default charset on exception
1205            return java.nio.charset.Charset.forName(defaultCharset);
1206        }
1207
1208    }
1209    /*
1210     * Uncomment the code below and run the main method to regenerate the
1211     * Javadoc table above when the known charsets change.
1212     */
1213
1214    /*
1215    private static String dumpHtmlTable() {
1216        LinkedList l = new LinkedList(Arrays.asList(JAVA_CHARSETS));
1217        Collections.sort(l);
1218        StringBuffer sb = new StringBuffer();
1219        sb.append(" * <table>\n");
1220        sb.append(" *     <tr>\n");
1221        sb.append(" *         <td>Canonical (Java) name</td>\n");
1222        sb.append(" *         <td>MIME preferred</td>\n");
1223        sb.append(" *         <td>Aliases</td>\n");
1224        sb.append(" *     </tr>\n");
1225
1226        for (Iterator it = l.iterator(); it.hasNext();) {
1227            Charset c = (Charset) it.next();
1228            sb.append(" *     <tr>\n");
1229            sb.append(" *         <td>" + c.canonical + "</td>\n");
1230            sb.append(" *         <td>" + (c.mime == null ? "?" : c.mime)+ "</td>\n");
1231            sb.append(" *         <td>");
1232            for (int i = 0; c.aliases != null && i < c.aliases.length; i++) {
1233                sb.append(c.aliases[i] + " ");
1234            }
1235            sb.append("</td>\n");
1236            sb.append(" *     </tr>\n");
1237        }
1238        sb.append(" * </table>\n");
1239        return sb.toString();
1240    }
1241
1242    public static void main(String[] args) {
1243        System.out.println(dumpHtmlTable());
1244    }*/
1245}
1246