StringCoding.java revision 89ca22d995205228f526db2a3c50e99e866fe9cf
1/*
2 * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package java.lang;
27
28import java.io.UnsupportedEncodingException;
29import java.lang.ref.SoftReference;
30import java.nio.ByteBuffer;
31import java.nio.CharBuffer;
32import java.nio.charset.Charset;
33import java.nio.charset.CharsetDecoder;
34import java.nio.charset.CharsetEncoder;
35import java.nio.charset.CharacterCodingException;
36import java.nio.charset.CoderResult;
37import java.nio.charset.CodingErrorAction;
38import java.nio.charset.IllegalCharsetNameException;
39import java.nio.charset.UnsupportedCharsetException;
40import java.util.Arrays;
41import sun.misc.MessageUtils;
42import sun.nio.cs.HistoricallyNamedCharset;
43import sun.nio.cs.ArrayDecoder;
44import sun.nio.cs.ArrayEncoder;
45
46/**
47 * Utility class for string encoding and decoding.
48 */
49
50class StringCoding {
51
52    private StringCoding() { }
53
54    /** The cached coders for each thread */
55    private final static ThreadLocal<SoftReference<StringDecoder>> decoder =
56        new ThreadLocal<>();
57    private final static ThreadLocal<SoftReference<StringEncoder>> encoder =
58        new ThreadLocal<>();
59
60    private static boolean warnUnsupportedCharset = true;
61
62    private static <T> T deref(ThreadLocal<SoftReference<T>> tl) {
63        SoftReference<T> sr = tl.get();
64        if (sr == null)
65            return null;
66        return sr.get();
67    }
68
69    private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) {
70        tl.set(new SoftReference<T>(ob));
71    }
72
73    // Trim the given byte array to the given length
74    //
75    private static byte[] safeTrim(byte[] ba, int len, Charset cs, boolean isTrusted) {
76
77        /* ----- BEGIN android -----
78        if (len == ba.length && (isTrusted || System.getSecurityManager() == null))
79        // Libcore tests expect a defensive copy in pretty much all cases.
80        // + System.getSecurityManager() == null is always true on android
81        */
82        if (len == ba.length && (isTrusted))
83            return ba;
84        else
85            return Arrays.copyOf(ba, len);
86    }
87
88    // Trim the given char array to the given length
89    //
90    private static char[] safeTrim(char[] ca, int len,
91                                   Charset cs, boolean isTrusted) {
92        /* ----- BEGIN android -----
93        if (len == ca.length && (isTrusted || System.getSecurityManager() == null))
94        // Libcore tests expect a defensive copy in pretty much all cases.
95        // + System.getSecurityManager() == null is always true on android
96        */
97        if (len == ca.length && (isTrusted))
98            return ca;
99        else
100            return Arrays.copyOf(ca, len);
101    }
102
103    private static int scale(int len, float expansionFactor) {
104        // We need to perform double, not float, arithmetic; otherwise
105        // we lose low order bits when len is larger than 2**24.
106        return (int)(len * (double)expansionFactor);
107    }
108
109    private static Charset lookupCharset(String csn) {
110        if (Charset.isSupported(csn)) {
111            try {
112                return Charset.forName(csn);
113            } catch (UnsupportedCharsetException x) {
114                throw new Error(x);
115            }
116        }
117        return null;
118    }
119
120    private static void warnUnsupportedCharset(String csn) {
121        if (warnUnsupportedCharset) {
122            // Use sun.misc.MessageUtils rather than the Logging API or
123            // System.err since this method may be called during VM
124            // initialization before either is available.
125            MessageUtils.err("WARNING: Default charset " + csn +
126                             " not supported, using ISO-8859-1 instead");
127            warnUnsupportedCharset = false;
128        }
129    }
130
131
132    // -- Decoding --
133    private static class StringDecoder {
134        private final String requestedCharsetName;
135        private final Charset cs;
136        private final CharsetDecoder cd;
137        private final boolean isTrusted;
138
139        private StringDecoder(Charset cs, String rcn) {
140            this.requestedCharsetName = rcn;
141            this.cs = cs;
142            this.cd = cs.newDecoder()
143                .onMalformedInput(CodingErrorAction.REPLACE)
144                .onUnmappableCharacter(CodingErrorAction.REPLACE);
145            this.isTrusted = (cs.getClass().getClassLoader() == null);
146        }
147
148        String charsetName() {
149            if (cs instanceof HistoricallyNamedCharset)
150                return ((HistoricallyNamedCharset)cs).historicalName();
151            return cs.name();
152        }
153
154        final String requestedCharsetName() {
155            return requestedCharsetName;
156        }
157
158        char[] decode(byte[] ba, int off, int len) {
159            int en = scale(len, cd.maxCharsPerByte());
160            char[] ca = new char[en];
161            if (len == 0)
162                return ca;
163            if (cd instanceof ArrayDecoder) {
164                int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
165                return safeTrim(ca, clen, cs, isTrusted);
166            } else {
167                cd.reset();
168                ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
169                CharBuffer cb = CharBuffer.wrap(ca);
170                try {
171                    CoderResult cr = cd.decode(bb, cb, true);
172                    if (!cr.isUnderflow())
173                        cr.throwException();
174                    cr = cd.flush(cb);
175                    if (!cr.isUnderflow())
176                        cr.throwException();
177                } catch (CharacterCodingException x) {
178                    // Substitution is always enabled,
179                    // so this shouldn't happen
180                    throw new Error(x);
181                }
182                return safeTrim(ca, cb.position(), cs, isTrusted);
183            }
184        }
185    }
186
187    static char[] decode(String charsetName, byte[] ba, int off, int len)
188        throws UnsupportedEncodingException
189    {
190        StringDecoder sd = deref(decoder);
191        String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
192        if ((sd == null) || !(csn.equals(sd.requestedCharsetName())
193                              || csn.equals(sd.charsetName()))) {
194            sd = null;
195            try {
196                Charset cs = lookupCharset(csn);
197                if (cs != null)
198                    sd = new StringDecoder(cs, csn);
199            } catch (IllegalCharsetNameException x) {}
200            if (sd == null)
201                throw new UnsupportedEncodingException(csn);
202            set(decoder, sd);
203        }
204        return sd.decode(ba, off, len);
205    }
206
207    static char[] decode(Charset cs, byte[] ba, int off, int len) {
208        // (1)We never cache the "external" cs, the only benefit of creating
209        // an additional StringDe/Encoder object to wrap it is to share the
210        // de/encode() method. These SD/E objects are short-lifed, the young-gen
211        // gc should be able to take care of them well. But the best approash
212        // is still not to generate them if not really necessary.
213        // (2)The defensive copy of the input byte/char[] has a big performance
214        // impact, as well as the outgoing result byte/char[]. Need to do the
215        // optimization check of (sm==null && classLoader0==null) for both.
216        // (3)getClass().getClassLoader0() is expensive
217        // (4)There might be a timing gap in isTrusted setting. getClassLoader0()
218        // is only chcked (and then isTrusted gets set) when (SM==null). It is
219        // possible that the SM==null for now but then SM is NOT null later
220        // when safeTrim() is invoked...the "safe" way to do is to redundant
221        // check (... && (isTrusted || SM == null || getClassLoader0())) in trim
222        // but it then can be argued that the SM is null when the opertaion
223        // is started...
224        CharsetDecoder cd = cs.newDecoder();
225        int en = scale(len, cd.maxCharsPerByte());
226        char[] ca = new char[en];
227        if (len == 0)
228            return ca;
229        boolean isTrusted = false;
230        if (System.getSecurityManager() != null) {
231            if (!(isTrusted = (cs.getClass().getClassLoader() == null))) {
232                ba =  Arrays.copyOfRange(ba, off, off + len);
233                off = 0;
234            }
235        }
236        cd.onMalformedInput(CodingErrorAction.REPLACE)
237          .onUnmappableCharacter(CodingErrorAction.REPLACE)
238          .reset();
239        if (cd instanceof ArrayDecoder) {
240            int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
241            return safeTrim(ca, clen, cs, isTrusted);
242        } else {
243            ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
244            CharBuffer cb = CharBuffer.wrap(ca);
245            try {
246                CoderResult cr = cd.decode(bb, cb, true);
247                if (!cr.isUnderflow())
248                    cr.throwException();
249                cr = cd.flush(cb);
250                if (!cr.isUnderflow())
251                    cr.throwException();
252            } catch (CharacterCodingException x) {
253                // Substitution is always enabled,
254                // so this shouldn't happen
255                throw new Error(x);
256            }
257            return safeTrim(ca, cb.position(), cs, isTrusted);
258        }
259    }
260
261    static char[] decode(byte[] ba, int off, int len) {
262        String csn = Charset.defaultCharset().name();
263        try {
264            // use charset name decode() variant which provides caching.
265            return decode(csn, ba, off, len);
266        } catch (UnsupportedEncodingException x) {
267            warnUnsupportedCharset(csn);
268        }
269        try {
270            return decode("ISO-8859-1", ba, off, len);
271        } catch (UnsupportedEncodingException x) {
272            // If this code is hit during VM initialization, MessageUtils is
273            // the only way we will be able to get any kind of error message.
274            MessageUtils.err("ISO-8859-1 charset not available: "
275                             + x.toString());
276            // If we can not find ISO-8859-1 (a required encoding) then things
277            // are seriously wrong with the installation.
278            System.exit(1);
279            return null;
280        }
281    }
282
283    // -- Encoding --
284    private static class StringEncoder {
285        private Charset cs;
286        private CharsetEncoder ce;
287        private final String requestedCharsetName;
288        private final boolean isTrusted;
289
290        private StringEncoder(Charset cs, String rcn) {
291            this.requestedCharsetName = rcn;
292            this.cs = cs;
293            this.ce = cs.newEncoder()
294                .onMalformedInput(CodingErrorAction.REPLACE)
295                .onUnmappableCharacter(CodingErrorAction.REPLACE);
296            this.isTrusted = (cs.getClass().getClassLoader() == null);
297        }
298
299        String charsetName() {
300            if (cs instanceof HistoricallyNamedCharset)
301                return ((HistoricallyNamedCharset)cs).historicalName();
302            return cs.name();
303        }
304
305        final String requestedCharsetName() {
306            return requestedCharsetName;
307        }
308
309        byte[] encode(char[] ca, int off, int len) {
310            int en = scale(len, ce.maxBytesPerChar());
311            byte[] ba = new byte[en];
312            if (len == 0)
313                return ba;
314            if (ce instanceof ArrayEncoder) {
315                int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
316                return safeTrim(ba, blen, cs, isTrusted);
317            } else {
318                ce.reset();
319                ByteBuffer bb = ByteBuffer.wrap(ba);
320                CharBuffer cb = CharBuffer.wrap(ca, off, len);
321                try {
322                    /* ----- BEGIN android -----
323                    CoderResult cr = ce.encode(cb, bb, true);
324                    Pass read-only buffer, so the encoder can't alter it */
325                    CoderResult cr = ce.encode(cb.asReadOnlyBuffer(), bb, true);
326                    if (!cr.isUnderflow())
327                        cr.throwException();
328                    cr = ce.flush(bb);
329                    if (!cr.isUnderflow())
330                        cr.throwException();
331                } catch (CharacterCodingException x) {
332                    // Substitution is always enabled,
333                    // so this shouldn't happen
334                    throw new Error(x);
335                }
336                return safeTrim(ba, bb.position(), cs, isTrusted);
337            }
338        }
339    }
340
341    static byte[] encode(String charsetName, char[] ca, int off, int len)
342        throws UnsupportedEncodingException
343    {
344        StringEncoder se = deref(encoder);
345        String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
346        if ((se == null) || !(csn.equals(se.requestedCharsetName())
347                              || csn.equals(se.charsetName()))) {
348            se = null;
349            try {
350                Charset cs = lookupCharset(csn);
351                if (cs != null)
352                    se = new StringEncoder(cs, csn);
353            } catch (IllegalCharsetNameException x) {}
354            if (se == null)
355                throw new UnsupportedEncodingException (csn);
356            set(encoder, se);
357        }
358        return se.encode(ca, off, len);
359    }
360
361    static byte[] encode(Charset cs, char[] ca, int off, int len) {
362        CharsetEncoder ce = cs.newEncoder();
363        int en = scale(len, ce.maxBytesPerChar());
364        byte[] ba = new byte[en];
365        if (len == 0)
366            return ba;
367        boolean isTrusted = false;
368        if (System.getSecurityManager() != null) {
369            if (!(isTrusted = (cs.getClass().getClassLoader() == null))) {
370                ca =  Arrays.copyOfRange(ca, off, off + len);
371                off = 0;
372            }
373        }
374        ce.onMalformedInput(CodingErrorAction.REPLACE)
375          .onUnmappableCharacter(CodingErrorAction.REPLACE)
376          .reset();
377        if (ce instanceof ArrayEncoder) {
378            int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
379            return safeTrim(ba, blen, cs, isTrusted);
380        } else {
381            ByteBuffer bb = ByteBuffer.wrap(ba);
382            CharBuffer cb = CharBuffer.wrap(ca, off, len);
383            try {
384                /* ----- BEGIN android -----
385                   CoderResult cr = ce.encode(cb, bb, true);
386                   Pass read-only buffer, so the encoder can't alter it */
387                CoderResult cr = ce.encode(cb.asReadOnlyBuffer(), bb, true);
388                if (!cr.isUnderflow())
389                    cr.throwException();
390                cr = ce.flush(bb);
391                if (!cr.isUnderflow())
392                    cr.throwException();
393            } catch (CharacterCodingException x) {
394                throw new Error(x);
395            }
396            return safeTrim(ba, bb.position(), cs, isTrusted);
397        }
398    }
399
400    static byte[] encode(char[] ca, int off, int len) {
401        String csn = Charset.defaultCharset().name();
402        try {
403            // use charset name encode() variant which provides caching.
404            return encode(csn, ca, off, len);
405        } catch (UnsupportedEncodingException x) {
406            warnUnsupportedCharset(csn);
407        }
408        try {
409            return encode("ISO-8859-1", ca, off, len);
410        } catch (UnsupportedEncodingException x) {
411            // If this code is hit during VM initialization, MessageUtils is
412            // the only way we will be able to get any kind of error message.
413            MessageUtils.err("ISO-8859-1 charset not available: "
414                             + x.toString());
415            // If we can not find ISO-8859-1 (a required encoding) then things
416            // are seriously wrong with the installation.
417            System.exit(1);
418            return null;
419        }
420    }
421}
422