StringCoding.java revision 2c87ad3a45cecf9e344487cad1abfdebe79f2c7c
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.  Oracle designates this
9 * particular file as subject to the "Classpath" exception as provided
10 * by Oracle in the LICENSE file that accompanied this code.
11 *
12 * This code is distributed in the hope that it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 * version 2 for more details (a copy is included in the LICENSE file that
16 * accompanied this code).
17 *
18 * You should have received a copy of the GNU General Public License version
19 * 2 along with this work; if not, write to the Free Software Foundation,
20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
23 * or visit www.oracle.com if you need additional information or have any
24 * questions.
25 */
26
27package java.lang;
28
29import java.io.UnsupportedEncodingException;
30import java.lang.ref.SoftReference;
31import java.nio.ByteBuffer;
32import java.nio.CharBuffer;
33import java.nio.charset.Charset;
34import java.nio.charset.CharsetDecoder;
35import java.nio.charset.CharsetEncoder;
36import java.nio.charset.CharacterCodingException;
37import java.nio.charset.CoderResult;
38import java.nio.charset.CodingErrorAction;
39import java.nio.charset.IllegalCharsetNameException;
40import java.nio.charset.UnsupportedCharsetException;
41import java.util.Arrays;
42import sun.misc.MessageUtils;
43import sun.nio.cs.HistoricallyNamedCharset;
44import sun.nio.cs.ArrayDecoder;
45import sun.nio.cs.ArrayEncoder;
46
47/**
48 * Utility class for string encoding and decoding.
49 */
50
51class StringCoding {
52
53    private StringCoding() { }
54
55    /** The cached coders for each thread */
56    private final static ThreadLocal<SoftReference<StringDecoder>> decoder =
57        new ThreadLocal<>();
58    private final static ThreadLocal<SoftReference<StringEncoder>> encoder =
59        new ThreadLocal<>();
60
61    private static boolean warnUnsupportedCharset = true;
62
63    private static <T> T deref(ThreadLocal<SoftReference<T>> tl) {
64        SoftReference<T> sr = tl.get();
65        if (sr == null)
66            return null;
67        return sr.get();
68    }
69
70    private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) {
71        tl.set(new SoftReference<T>(ob));
72    }
73
74    // Trim the given byte array to the given length
75    //
76    private static byte[] safeTrim(byte[] ba, int len, Charset cs, boolean isTrusted) {
77
78        /* ----- BEGIN android -----
79        if (len == ba.length && (isTrusted || System.getSecurityManager() == null))
80        // Libcore tests expect a defensive copy in pretty much all cases.
81        // + System.getSecurityManager() == null is always true on android
82        */
83        if (len == ba.length && (isTrusted))
84            return ba;
85        else
86            return Arrays.copyOf(ba, len);
87    }
88
89    // Trim the given char array to the given length
90    //
91    private static char[] safeTrim(char[] ca, int len,
92                                   Charset cs, boolean isTrusted) {
93        /* ----- BEGIN android -----
94        if (len == ca.length && (isTrusted || System.getSecurityManager() == null))
95        // Libcore tests expect a defensive copy in pretty much all cases.
96        // + System.getSecurityManager() == null is always true on android
97        */
98        if (len == ca.length && (isTrusted))
99            return ca;
100        else
101            return Arrays.copyOf(ca, len);
102    }
103
104    private static int scale(int len, float expansionFactor) {
105        // We need to perform double, not float, arithmetic; otherwise
106        // we lose low order bits when len is larger than 2**24.
107        return (int)(len * (double)expansionFactor);
108    }
109
110    private static Charset lookupCharset(String csn) {
111        if (Charset.isSupported(csn)) {
112            try {
113                return Charset.forName(csn);
114            } catch (UnsupportedCharsetException x) {
115                throw new Error(x);
116            }
117        }
118        return null;
119    }
120
121    private static void warnUnsupportedCharset(String csn) {
122        if (warnUnsupportedCharset) {
123            // Use sun.misc.MessageUtils rather than the Logging API or
124            // System.err since this method may be called during VM
125            // initialization before either is available.
126            MessageUtils.err("WARNING: Default charset " + csn +
127                             " not supported, using ISO-8859-1 instead");
128            warnUnsupportedCharset = false;
129        }
130    }
131
132
133    // -- Decoding --
134    private static class StringDecoder {
135        private final String requestedCharsetName;
136        private final Charset cs;
137        private final CharsetDecoder cd;
138        private final boolean isTrusted;
139
140        private StringDecoder(Charset cs, String rcn) {
141            this.requestedCharsetName = rcn;
142            this.cs = cs;
143            this.cd = cs.newDecoder()
144                .onMalformedInput(CodingErrorAction.REPLACE)
145                .onUnmappableCharacter(CodingErrorAction.REPLACE);
146            this.isTrusted = (cs.getClass().getClassLoader() == null);
147        }
148
149        String charsetName() {
150            if (cs instanceof HistoricallyNamedCharset)
151                return ((HistoricallyNamedCharset)cs).historicalName();
152            return cs.name();
153        }
154
155        final String requestedCharsetName() {
156            return requestedCharsetName;
157        }
158
159        char[] decode(byte[] ba, int off, int len) {
160            int en = scale(len, cd.maxCharsPerByte());
161            char[] ca = new char[en];
162            if (len == 0)
163                return ca;
164            if (cd instanceof ArrayDecoder) {
165                int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
166                return safeTrim(ca, clen, cs, isTrusted);
167            } else {
168                cd.reset();
169                ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
170                CharBuffer cb = CharBuffer.wrap(ca);
171                try {
172                    CoderResult cr = cd.decode(bb, cb, true);
173                    if (!cr.isUnderflow())
174                        cr.throwException();
175                    cr = cd.flush(cb);
176                    if (!cr.isUnderflow())
177                        cr.throwException();
178                } catch (CharacterCodingException x) {
179                    // Substitution is always enabled,
180                    // so this shouldn't happen
181                    throw new Error(x);
182                }
183                return safeTrim(ca, cb.position(), cs, isTrusted);
184            }
185        }
186    }
187
188    static char[] decode(String charsetName, byte[] ba, int off, int len)
189        throws UnsupportedEncodingException
190    {
191        StringDecoder sd = deref(decoder);
192        String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
193        if ((sd == null) || !(csn.equals(sd.requestedCharsetName())
194                              || csn.equals(sd.charsetName()))) {
195            sd = null;
196            try {
197                Charset cs = lookupCharset(csn);
198                if (cs != null)
199                    sd = new StringDecoder(cs, csn);
200            } catch (IllegalCharsetNameException x) {}
201            if (sd == null)
202                throw new UnsupportedEncodingException(csn);
203            set(decoder, sd);
204        }
205        return sd.decode(ba, off, len);
206    }
207
208    static char[] decode(Charset cs, byte[] ba, int off, int len) {
209        // (1)We never cache the "external" cs, the only benefit of creating
210        // an additional StringDe/Encoder object to wrap it is to share the
211        // de/encode() method. These SD/E objects are short-lifed, the young-gen
212        // gc should be able to take care of them well. But the best approash
213        // is still not to generate them if not really necessary.
214        // (2)The defensive copy of the input byte/char[] has a big performance
215        // impact, as well as the outgoing result byte/char[]. Need to do the
216        // optimization check of (sm==null && classLoader0==null) for both.
217        // (3)getClass().getClassLoader0() is expensive
218        // (4)There might be a timing gap in isTrusted setting. getClassLoader0()
219        // is only chcked (and then isTrusted gets set) when (SM==null). It is
220        // possible that the SM==null for now but then SM is NOT null later
221        // when safeTrim() is invoked...the "safe" way to do is to redundant
222        // check (... && (isTrusted || SM == null || getClassLoader0())) in trim
223        // but it then can be argued that the SM is null when the opertaion
224        // is started...
225        CharsetDecoder cd = cs.newDecoder();
226        int en = scale(len, cd.maxCharsPerByte());
227        char[] ca = new char[en];
228        if (len == 0)
229            return ca;
230        boolean isTrusted = false;
231        if (System.getSecurityManager() != null) {
232            if (!(isTrusted = (cs.getClass().getClassLoader() == null))) {
233                ba =  Arrays.copyOfRange(ba, off, off + len);
234                off = 0;
235            }
236        }
237        cd.onMalformedInput(CodingErrorAction.REPLACE)
238          .onUnmappableCharacter(CodingErrorAction.REPLACE)
239          .reset();
240        if (cd instanceof ArrayDecoder) {
241            int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
242            return safeTrim(ca, clen, cs, isTrusted);
243        } else {
244            ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
245            CharBuffer cb = CharBuffer.wrap(ca);
246            try {
247                CoderResult cr = cd.decode(bb, cb, true);
248                if (!cr.isUnderflow())
249                    cr.throwException();
250                cr = cd.flush(cb);
251                if (!cr.isUnderflow())
252                    cr.throwException();
253            } catch (CharacterCodingException x) {
254                // Substitution is always enabled,
255                // so this shouldn't happen
256                throw new Error(x);
257            }
258            return safeTrim(ca, cb.position(), cs, isTrusted);
259        }
260    }
261
262    static char[] decode(byte[] ba, int off, int len) {
263        String csn = Charset.defaultCharset().name();
264        try {
265            // use charset name decode() variant which provides caching.
266            return decode(csn, ba, off, len);
267        } catch (UnsupportedEncodingException x) {
268            warnUnsupportedCharset(csn);
269        }
270        try {
271            return decode("ISO-8859-1", ba, off, len);
272        } catch (UnsupportedEncodingException x) {
273            // If this code is hit during VM initialization, MessageUtils is
274            // the only way we will be able to get any kind of error message.
275            MessageUtils.err("ISO-8859-1 charset not available: "
276                             + x.toString());
277            // If we can not find ISO-8859-1 (a required encoding) then things
278            // are seriously wrong with the installation.
279            System.exit(1);
280            return null;
281        }
282    }
283
284    // -- Encoding --
285    private static class StringEncoder {
286        private Charset cs;
287        private CharsetEncoder ce;
288        private final String requestedCharsetName;
289        private final boolean isTrusted;
290
291        private StringEncoder(Charset cs, String rcn) {
292            this.requestedCharsetName = rcn;
293            this.cs = cs;
294            this.ce = cs.newEncoder()
295                .onMalformedInput(CodingErrorAction.REPLACE)
296                .onUnmappableCharacter(CodingErrorAction.REPLACE);
297            this.isTrusted = (cs.getClass().getClassLoader() == null);
298        }
299
300        String charsetName() {
301            if (cs instanceof HistoricallyNamedCharset)
302                return ((HistoricallyNamedCharset)cs).historicalName();
303            return cs.name();
304        }
305
306        final String requestedCharsetName() {
307            return requestedCharsetName;
308        }
309
310        byte[] encode(char[] ca, int off, int len) {
311            int en = scale(len, ce.maxBytesPerChar());
312            if (len == 0)
313                return new byte[0];
314            if (ce instanceof ArrayEncoder) {
315                byte[] ba = new byte[en];
316                int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
317                return safeTrim(ba, blen, cs, isTrusted);
318            } else {
319                ce.reset();
320                CharBuffer cb = CharBuffer.wrap(ca, off, len);
321                try {
322                    /* ----- BEGIN android -----
323                    CoderResult cr = ce.encode(cb, bb, true);
324                    Pass read-only buffer, so the encoder can't alter it */
325                    ByteBuffer bb = ce.encode(cb.asReadOnlyBuffer());
326                    return safeTrim(bb.array(), bb.limit(), cs, isTrusted);
327                } catch (CharacterCodingException x) {
328                    // Substitution is always enabled,
329                    // so this shouldn't happen
330                    throw new Error(x);
331                }
332            }
333        }
334    }
335
336    static byte[] encode(String charsetName, char[] ca, int off, int len)
337        throws UnsupportedEncodingException
338    {
339        StringEncoder se = deref(encoder);
340        String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
341        if ((se == null) || !(csn.equals(se.requestedCharsetName())
342                              || csn.equals(se.charsetName()))) {
343            se = null;
344            try {
345                Charset cs = lookupCharset(csn);
346                if (cs != null)
347                    se = new StringEncoder(cs, csn);
348            } catch (IllegalCharsetNameException x) {}
349            if (se == null)
350                throw new UnsupportedEncodingException (csn);
351            set(encoder, se);
352        }
353        return se.encode(ca, off, len);
354    }
355
356    static byte[] encode(Charset cs, char[] ca, int off, int len) {
357        CharsetEncoder ce = cs.newEncoder();
358        int en = scale(len, ce.maxBytesPerChar());
359        byte[] ba = new byte[en];
360        if (len == 0)
361            return ba;
362        boolean isTrusted = false;
363        if (System.getSecurityManager() != null) {
364            if (!(isTrusted = (cs.getClass().getClassLoader() == null))) {
365                ca =  Arrays.copyOfRange(ca, off, off + len);
366                off = 0;
367            }
368        }
369        ce.onMalformedInput(CodingErrorAction.REPLACE)
370          .onUnmappableCharacter(CodingErrorAction.REPLACE)
371          .reset();
372        if (ce instanceof ArrayEncoder) {
373            int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
374            return safeTrim(ba, blen, cs, isTrusted);
375        } else {
376            ByteBuffer bb = ByteBuffer.wrap(ba);
377            CharBuffer cb = CharBuffer.wrap(ca, off, len);
378            try {
379                /* ----- BEGIN android -----
380                   CoderResult cr = ce.encode(cb, bb, true);
381                   Pass read-only buffer, so the encoder can't alter it */
382                CoderResult cr = ce.encode(cb.asReadOnlyBuffer(), bb, true);
383                if (!cr.isUnderflow())
384                    cr.throwException();
385                cr = ce.flush(bb);
386                if (!cr.isUnderflow())
387                    cr.throwException();
388            } catch (CharacterCodingException x) {
389                throw new Error(x);
390            }
391            return safeTrim(ba, bb.position(), cs, isTrusted);
392        }
393    }
394
395    static byte[] encode(Charset cs, String str) {
396        ByteBuffer buffer = cs.encode(str);
397        byte[] bytes = new byte[buffer.limit()];
398        buffer.get(bytes);
399        return bytes;
400    }
401
402    static byte[] encode(char[] ca, int off, int len) {
403        String csn = Charset.defaultCharset().name();
404        try {
405            // use charset name encode() variant which provides caching.
406            return encode(csn, ca, off, len);
407        } catch (UnsupportedEncodingException x) {
408            warnUnsupportedCharset(csn);
409        }
410        try {
411            return encode("ISO-8859-1", ca, off, len);
412        } catch (UnsupportedEncodingException x) {
413            // If this code is hit during VM initialization, MessageUtils is
414            // the only way we will be able to get any kind of error message.
415            MessageUtils.err("ISO-8859-1 charset not available: "
416                             + x.toString());
417            // If we can not find ISO-8859-1 (a required encoding) then things
418            // are seriously wrong with the installation.
419            System.exit(1);
420            return null;
421        }
422    }
423}
424