1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the  "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18/*
19 * $Id: Encodings.java 471981 2006-11-07 04:28:00Z minchau $
20 */
21package org.apache.xml.serializer;
22
23import java.io.InputStream;
24import java.io.OutputStream;
25import java.io.OutputStreamWriter;
26import java.io.UnsupportedEncodingException;
27import java.io.Writer;
28import java.util.ArrayList;
29import java.util.Enumeration;
30import java.util.Hashtable;
31import java.util.List;
32import java.util.Properties;
33import java.util.StringTokenizer;
34
35
36/**
37 * Provides information about encodings. Depends on the Java runtime
38 * to provides writers for the different encodings.
39 * <p>
40 * This class is not a public API. It is only public because it
41 * is used outside of this package.
42 *
43 * @xsl.usage internal
44 */
45
46public final class Encodings extends Object
47{
48    /**
49     * Standard filename for properties file with encodings data.
50     */
51    private static final String ENCODINGS_FILE = SerializerBase.PKG_PATH+"/Encodings.properties";
52
53    /**
54     * Returns a writer for the specified encoding based on
55     * an output stream.
56     * <p>
57     * This is not a public API.
58     * @param output The output stream
59     * @param encoding The encoding MIME name, not a Java name for the encoding.
60     * @return A suitable writer
61     * @throws UnsupportedEncodingException There is no convertor
62     *  to support this encoding
63     * @xsl.usage internal
64     */
65    static Writer getWriter(OutputStream output, String encoding)
66        throws UnsupportedEncodingException
67    {
68
69        for (int i = 0; i < _encodings.length; ++i)
70        {
71            if (_encodings[i].name.equalsIgnoreCase(encoding))
72            {
73                try
74                {
75                    String javaName = _encodings[i].javaName;
76                	OutputStreamWriter osw = new OutputStreamWriter(output,javaName);
77                    return osw;
78                }
79                catch (java.lang.IllegalArgumentException iae) // java 1.1.8
80                {
81                    // keep trying
82                }
83                catch (UnsupportedEncodingException usee)
84                {
85
86                    // keep trying
87                }
88            }
89        }
90
91        try
92        {
93            return new OutputStreamWriter(output, encoding);
94        }
95        catch (java.lang.IllegalArgumentException iae) // java 1.1.8
96        {
97            throw new UnsupportedEncodingException(encoding);
98        }
99    }
100
101    /**
102     * Returns the EncodingInfo object for the specified
103     * encoding, never null, although the encoding name
104     * inside the returned EncodingInfo object will be if
105     * we can't find a "real" EncodingInfo for the encoding.
106     * <p>
107     * This is not a public API.
108     *
109     * @param encoding The encoding
110     * @return The object that is used to determine if
111     * characters are in the given encoding.
112     * @xsl.usage internal
113     */
114    static EncodingInfo getEncodingInfo(String encoding)
115    {
116        EncodingInfo ei;
117
118        String normalizedEncoding = toUpperCaseFast(encoding);
119        ei = (EncodingInfo) _encodingTableKeyJava.get(normalizedEncoding);
120        if (ei == null)
121            ei = (EncodingInfo) _encodingTableKeyMime.get(normalizedEncoding);
122        if (ei == null) {
123            // We shouldn't have to do this, but just in case.
124            ei = new EncodingInfo(null,null, '\u0000');
125        }
126
127        return ei;
128    }
129
130    /**
131     * Determines if the encoding specified was recognized by the
132     * serializer or not.
133     *
134     * @param encoding The encoding
135     * @return boolean - true if the encoding was recognized else false
136     */
137    public static boolean isRecognizedEncoding(String encoding)
138    {
139        EncodingInfo ei;
140
141        String normalizedEncoding = encoding.toUpperCase();
142        ei = (EncodingInfo) _encodingTableKeyJava.get(normalizedEncoding);
143        if (ei == null)
144            ei = (EncodingInfo) _encodingTableKeyMime.get(normalizedEncoding);
145        if (ei != null)
146            return true;
147        return false;
148    }
149
150    /**
151     * A fast and cheap way to uppercase a String that is
152     * only made of printable ASCII characters.
153     * <p>
154     * This is not a public API.
155     * @param s a String of ASCII characters
156     * @return an uppercased version of the input String,
157     * possibly the same String.
158     * @xsl.usage internal
159     */
160    static private String toUpperCaseFast(final String s) {
161
162    	boolean different = false;
163    	final int mx = s.length();
164		char[] chars = new char[mx];
165    	for (int i=0; i < mx; i++) {
166    		char ch = s.charAt(i);
167            // is the character a lower case ASCII one?
168    		if ('a' <= ch && ch <= 'z') {
169                // a cheap and fast way to uppercase that is good enough
170    			ch = (char) (ch + ('A' - 'a'));
171    			different = true; // the uppercased String is different
172    		}
173    		chars[i] = ch;
174    	}
175
176    	// A little optimization, don't call String.valueOf() if
177    	// the uppercased string is the same as the input string.
178    	final String upper;
179    	if (different)
180    		upper = String.valueOf(chars);
181    	else
182    		upper = s;
183
184    	return upper;
185    }
186
187    /** The default encoding, ISO style, ISO style.   */
188    static final String DEFAULT_MIME_ENCODING = "UTF-8";
189
190    /**
191     * Get the proper mime encoding.  From the XSLT recommendation: "The encoding
192     * attribute specifies the preferred encoding to use for outputting the result
193     * tree. XSLT processors are required to respect values of UTF-8 and UTF-16.
194     * For other values, if the XSLT processor does not support the specified
195     * encoding it may signal an error; if it does not signal an error it should
196     * use UTF-8 or UTF-16 instead. The XSLT processor must not use an encoding
197     * whose name does not match the EncName production of the XML Recommendation
198     * [XML]. If no encoding attribute is specified, then the XSLT processor should
199     * use either UTF-8 or UTF-16."
200     * <p>
201     * This is not a public API.
202     *
203     * @param encoding Reference to java-style encoding string, which may be null,
204     * in which case a default will be found.
205     *
206     * @return The ISO-style encoding string, or null if failure.
207     * @xsl.usage internal
208     */
209    static String getMimeEncoding(String encoding)
210    {
211
212        if (null == encoding)
213        {
214            try
215            {
216
217                // Get the default system character encoding.  This may be
218                // incorrect if they passed in a writer, but right now there
219                // seems to be no way to get the encoding from a writer.
220                encoding = System.getProperty("file.encoding", "UTF8");
221
222                if (null != encoding)
223                {
224
225                    /*
226                    * See if the mime type is equal to UTF8.  If you don't
227                    * do that, then  convertJava2MimeEncoding will convert
228                    * 8859_1 to "ISO-8859-1", which is not what we want,
229                    * I think, and I don't think I want to alter the tables
230                    * to convert everything to UTF-8.
231                    */
232                    String jencoding =
233                        (encoding.equalsIgnoreCase("Cp1252")
234                            || encoding.equalsIgnoreCase("ISO8859_1")
235                            || encoding.equalsIgnoreCase("8859_1")
236                            || encoding.equalsIgnoreCase("UTF8"))
237                            ? DEFAULT_MIME_ENCODING
238                            : convertJava2MimeEncoding(encoding);
239
240                    encoding =
241                        (null != jencoding) ? jencoding : DEFAULT_MIME_ENCODING;
242                }
243                else
244                {
245                    encoding = DEFAULT_MIME_ENCODING;
246                }
247            }
248            catch (SecurityException se)
249            {
250                encoding = DEFAULT_MIME_ENCODING;
251            }
252        }
253        else
254        {
255            encoding = convertJava2MimeEncoding(encoding);
256        }
257
258        return encoding;
259    }
260
261    /**
262     * Try the best we can to convert a Java encoding to a XML-style encoding.
263     * <p>
264     * This is not a public API.
265     * @param encoding non-null reference to encoding string, java style.
266     *
267     * @return ISO-style encoding string.
268     * @xsl.usage internal
269     */
270    private static String convertJava2MimeEncoding(String encoding)
271    {
272        EncodingInfo enc =
273            (EncodingInfo) _encodingTableKeyJava.get(toUpperCaseFast(encoding));
274        if (null != enc)
275            return enc.name;
276        return encoding;
277    }
278
279    /**
280     * Try the best we can to convert a Java encoding to a XML-style encoding.
281     * <p>
282     * This is not a public API.
283     *
284     * @param encoding non-null reference to encoding string, java style.
285     *
286     * @return ISO-style encoding string.
287     * <p>
288     * This method is not a public API.
289     * @xsl.usage internal
290     */
291    public static String convertMime2JavaEncoding(String encoding)
292    {
293
294        for (int i = 0; i < _encodings.length; ++i)
295        {
296            if (_encodings[i].name.equalsIgnoreCase(encoding))
297            {
298                return _encodings[i].javaName;
299            }
300        }
301
302        return encoding;
303    }
304
305    /**
306     * Load a list of all the supported encodings.
307     *
308     * System property "encodings" formatted using URL syntax may define an
309     * external encodings list. Thanks to Sergey Ushakov for the code
310     * contribution!
311     * @xsl.usage internal
312     */
313    private static EncodingInfo[] loadEncodingInfo()
314    {
315        try
316        {
317            final InputStream is;
318
319            SecuritySupport ss = SecuritySupport.getInstance();
320            is = ss.getResourceAsStream(ObjectFactory.findClassLoader(),
321                                            ENCODINGS_FILE);
322
323            Properties props = new Properties();
324            if (is != null) {
325                props.load(is);
326                is.close();
327            } else {
328                // Seems to be no real need to force failure here, let the
329                // system do its best... The issue is not really very critical,
330                // and the output will be in any case _correct_ though maybe not
331                // always human-friendly... :)
332                // But maybe report/log the resource problem?
333                // Any standard ways to report/log errors (in static context)?
334            }
335
336            int totalEntries = props.size();
337
338            List encodingInfo_list = new ArrayList();
339            Enumeration keys = props.keys();
340            for (int i = 0; i < totalEntries; ++i)
341            {
342                String javaName = (String) keys.nextElement();
343                String val = props.getProperty(javaName);
344                int len = lengthOfMimeNames(val);
345
346                String mimeName;
347                char highChar;
348                if (len == 0)
349                {
350                    // There is no property value, only the javaName, so try and recover
351                    mimeName = javaName;
352                    highChar = '\u0000'; // don't know the high code point, will need to test every character
353                }
354                else
355                {
356                    try {
357                        // Get the substring after the Mime names
358                        final String highVal = val.substring(len).trim();
359                        highChar = (char) Integer.decode(highVal).intValue();
360                    }
361                    catch( NumberFormatException e) {
362                        highChar = 0;
363                    }
364                    String mimeNames = val.substring(0, len);
365                    StringTokenizer st =
366                        new StringTokenizer(mimeNames, ",");
367                    for (boolean first = true;
368                        st.hasMoreTokens();
369                        first = false)
370                    {
371                        mimeName = st.nextToken();
372                        EncodingInfo ei = new EncodingInfo(mimeName, javaName, highChar);
373                        encodingInfo_list.add(ei);
374                        _encodingTableKeyMime.put(mimeName.toUpperCase(), ei);
375                        if (first)
376                            _encodingTableKeyJava.put(javaName.toUpperCase(), ei);
377                    }
378                }
379            }
380            // Convert the Vector of EncodingInfo objects into an array of them,
381            // as that is the kind of thing this method returns.
382            EncodingInfo[] ret_ei = new EncodingInfo[encodingInfo_list.size()];
383            encodingInfo_list.toArray(ret_ei);
384            return ret_ei;
385        }
386        catch (java.net.MalformedURLException mue)
387        {
388            throw new org.apache.xml.serializer.utils.WrappedRuntimeException(mue);
389        }
390        catch (java.io.IOException ioe)
391        {
392            throw new org.apache.xml.serializer.utils.WrappedRuntimeException(ioe);
393        }
394    }
395
396    /**
397     * Get the length of the Mime names within the property value
398     * @param val The value of the property, which should contain a comma
399     * separated list of Mime names, followed optionally by a space and the
400     * high char value
401     * @return
402     */
403    private static int lengthOfMimeNames(String val) {
404        // look for the space preceding the optional high char
405        int len = val.indexOf(' ');
406        // If len is zero it means the optional part is not there, so
407        // the value must be all Mime names, so set the length appropriately
408        if (len < 0)
409            len = val.length();
410
411        return len;
412    }
413
414    /**
415     * Return true if the character is the high member of a surrogate pair.
416     * <p>
417     * This is not a public API.
418     * @param ch the character to test
419     * @xsl.usage internal
420     */
421    static boolean isHighUTF16Surrogate(char ch) {
422        return ('\uD800' <= ch && ch <= '\uDBFF');
423    }
424    /**
425     * Return true if the character is the low member of a surrogate pair.
426     * <p>
427     * This is not a public API.
428     * @param ch the character to test
429     * @xsl.usage internal
430     */
431    static boolean isLowUTF16Surrogate(char ch) {
432        return ('\uDC00' <= ch && ch <= '\uDFFF');
433    }
434    /**
435     * Return the unicode code point represented by the high/low surrogate pair.
436     * <p>
437     * This is not a public API.
438     * @param highSurrogate the high char of the high/low pair
439     * @param lowSurrogate the low char of the high/low pair
440     * @xsl.usage internal
441     */
442    static int toCodePoint(char highSurrogate, char lowSurrogate) {
443        int codePoint =
444            ((highSurrogate - 0xd800) << 10)
445                + (lowSurrogate - 0xdc00)
446                + 0x10000;
447        return codePoint;
448    }
449    /**
450     * Return the unicode code point represented by the char.
451     * A bit of a dummy method, since all it does is return the char,
452     * but as an int value.
453     * <p>
454     * This is not a public API.
455     * @param ch the char.
456     * @xsl.usage internal
457     */
458    static int toCodePoint(char ch) {
459        int codePoint = ch;
460        return codePoint;
461    }
462
463    /**
464     * Characters with values at or below the high code point are
465     * in the encoding. Code point values above this one may or may
466     * not be in the encoding, but lower ones certainly are.
467     * <p>
468     * This is for performance.
469     *
470     * @param encoding The encoding
471     * @return The code point for which characters at or below this code point
472     * are in the encoding. Characters with higher code point may or may not be
473     * in the encoding. A value of zero is returned if the high code point is unknown.
474     * <p>
475     * This method is not a public API.
476     * @xsl.usage internal
477     */
478    static public char getHighChar(String encoding)
479    {
480        final char highCodePoint;
481        EncodingInfo ei;
482
483        String normalizedEncoding = toUpperCaseFast(encoding);
484        ei = (EncodingInfo) _encodingTableKeyJava.get(normalizedEncoding);
485        if (ei == null)
486            ei = (EncodingInfo) _encodingTableKeyMime.get(normalizedEncoding);
487        if (ei != null)
488            highCodePoint =  ei.getHighChar();
489        else
490            highCodePoint = 0;
491        return highCodePoint;
492    }
493
494    private static final Hashtable _encodingTableKeyJava = new Hashtable();
495    private static final Hashtable _encodingTableKeyMime = new Hashtable();
496    private static final EncodingInfo[] _encodings = loadEncodingInfo();
497}
498