StringCoding.java revision 89ca22d995205228f526db2a3c50e99e866fe9cf
1/* 2 * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package java.lang; 27 28import java.io.UnsupportedEncodingException; 29import java.lang.ref.SoftReference; 30import java.nio.ByteBuffer; 31import java.nio.CharBuffer; 32import java.nio.charset.Charset; 33import java.nio.charset.CharsetDecoder; 34import java.nio.charset.CharsetEncoder; 35import java.nio.charset.CharacterCodingException; 36import java.nio.charset.CoderResult; 37import java.nio.charset.CodingErrorAction; 38import java.nio.charset.IllegalCharsetNameException; 39import java.nio.charset.UnsupportedCharsetException; 40import java.util.Arrays; 41import sun.misc.MessageUtils; 42import sun.nio.cs.HistoricallyNamedCharset; 43import sun.nio.cs.ArrayDecoder; 44import sun.nio.cs.ArrayEncoder; 45 46/** 47 * Utility class for string encoding and decoding. 48 */ 49 50class StringCoding { 51 52 private StringCoding() { } 53 54 /** The cached coders for each thread */ 55 private final static ThreadLocal<SoftReference<StringDecoder>> decoder = 56 new ThreadLocal<>(); 57 private final static ThreadLocal<SoftReference<StringEncoder>> encoder = 58 new ThreadLocal<>(); 59 60 private static boolean warnUnsupportedCharset = true; 61 62 private static <T> T deref(ThreadLocal<SoftReference<T>> tl) { 63 SoftReference<T> sr = tl.get(); 64 if (sr == null) 65 return null; 66 return sr.get(); 67 } 68 69 private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) { 70 tl.set(new SoftReference<T>(ob)); 71 } 72 73 // Trim the given byte array to the given length 74 // 75 private static byte[] safeTrim(byte[] ba, int len, Charset cs, boolean isTrusted) { 76 77 /* ----- BEGIN android ----- 78 if (len == ba.length && (isTrusted || System.getSecurityManager() == null)) 79 // Libcore tests expect a defensive copy in pretty much all cases. 80 // + System.getSecurityManager() == null is always true on android 81 */ 82 if (len == ba.length && (isTrusted)) 83 return ba; 84 else 85 return Arrays.copyOf(ba, len); 86 } 87 88 // Trim the given char array to the given length 89 // 90 private static char[] safeTrim(char[] ca, int len, 91 Charset cs, boolean isTrusted) { 92 /* ----- BEGIN android ----- 93 if (len == ca.length && (isTrusted || System.getSecurityManager() == null)) 94 // Libcore tests expect a defensive copy in pretty much all cases. 95 // + System.getSecurityManager() == null is always true on android 96 */ 97 if (len == ca.length && (isTrusted)) 98 return ca; 99 else 100 return Arrays.copyOf(ca, len); 101 } 102 103 private static int scale(int len, float expansionFactor) { 104 // We need to perform double, not float, arithmetic; otherwise 105 // we lose low order bits when len is larger than 2**24. 106 return (int)(len * (double)expansionFactor); 107 } 108 109 private static Charset lookupCharset(String csn) { 110 if (Charset.isSupported(csn)) { 111 try { 112 return Charset.forName(csn); 113 } catch (UnsupportedCharsetException x) { 114 throw new Error(x); 115 } 116 } 117 return null; 118 } 119 120 private static void warnUnsupportedCharset(String csn) { 121 if (warnUnsupportedCharset) { 122 // Use sun.misc.MessageUtils rather than the Logging API or 123 // System.err since this method may be called during VM 124 // initialization before either is available. 125 MessageUtils.err("WARNING: Default charset " + csn + 126 " not supported, using ISO-8859-1 instead"); 127 warnUnsupportedCharset = false; 128 } 129 } 130 131 132 // -- Decoding -- 133 private static class StringDecoder { 134 private final String requestedCharsetName; 135 private final Charset cs; 136 private final CharsetDecoder cd; 137 private final boolean isTrusted; 138 139 private StringDecoder(Charset cs, String rcn) { 140 this.requestedCharsetName = rcn; 141 this.cs = cs; 142 this.cd = cs.newDecoder() 143 .onMalformedInput(CodingErrorAction.REPLACE) 144 .onUnmappableCharacter(CodingErrorAction.REPLACE); 145 this.isTrusted = (cs.getClass().getClassLoader() == null); 146 } 147 148 String charsetName() { 149 if (cs instanceof HistoricallyNamedCharset) 150 return ((HistoricallyNamedCharset)cs).historicalName(); 151 return cs.name(); 152 } 153 154 final String requestedCharsetName() { 155 return requestedCharsetName; 156 } 157 158 char[] decode(byte[] ba, int off, int len) { 159 int en = scale(len, cd.maxCharsPerByte()); 160 char[] ca = new char[en]; 161 if (len == 0) 162 return ca; 163 if (cd instanceof ArrayDecoder) { 164 int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca); 165 return safeTrim(ca, clen, cs, isTrusted); 166 } else { 167 cd.reset(); 168 ByteBuffer bb = ByteBuffer.wrap(ba, off, len); 169 CharBuffer cb = CharBuffer.wrap(ca); 170 try { 171 CoderResult cr = cd.decode(bb, cb, true); 172 if (!cr.isUnderflow()) 173 cr.throwException(); 174 cr = cd.flush(cb); 175 if (!cr.isUnderflow()) 176 cr.throwException(); 177 } catch (CharacterCodingException x) { 178 // Substitution is always enabled, 179 // so this shouldn't happen 180 throw new Error(x); 181 } 182 return safeTrim(ca, cb.position(), cs, isTrusted); 183 } 184 } 185 } 186 187 static char[] decode(String charsetName, byte[] ba, int off, int len) 188 throws UnsupportedEncodingException 189 { 190 StringDecoder sd = deref(decoder); 191 String csn = (charsetName == null) ? "ISO-8859-1" : charsetName; 192 if ((sd == null) || !(csn.equals(sd.requestedCharsetName()) 193 || csn.equals(sd.charsetName()))) { 194 sd = null; 195 try { 196 Charset cs = lookupCharset(csn); 197 if (cs != null) 198 sd = new StringDecoder(cs, csn); 199 } catch (IllegalCharsetNameException x) {} 200 if (sd == null) 201 throw new UnsupportedEncodingException(csn); 202 set(decoder, sd); 203 } 204 return sd.decode(ba, off, len); 205 } 206 207 static char[] decode(Charset cs, byte[] ba, int off, int len) { 208 // (1)We never cache the "external" cs, the only benefit of creating 209 // an additional StringDe/Encoder object to wrap it is to share the 210 // de/encode() method. These SD/E objects are short-lifed, the young-gen 211 // gc should be able to take care of them well. But the best approash 212 // is still not to generate them if not really necessary. 213 // (2)The defensive copy of the input byte/char[] has a big performance 214 // impact, as well as the outgoing result byte/char[]. Need to do the 215 // optimization check of (sm==null && classLoader0==null) for both. 216 // (3)getClass().getClassLoader0() is expensive 217 // (4)There might be a timing gap in isTrusted setting. getClassLoader0() 218 // is only chcked (and then isTrusted gets set) when (SM==null). It is 219 // possible that the SM==null for now but then SM is NOT null later 220 // when safeTrim() is invoked...the "safe" way to do is to redundant 221 // check (... && (isTrusted || SM == null || getClassLoader0())) in trim 222 // but it then can be argued that the SM is null when the opertaion 223 // is started... 224 CharsetDecoder cd = cs.newDecoder(); 225 int en = scale(len, cd.maxCharsPerByte()); 226 char[] ca = new char[en]; 227 if (len == 0) 228 return ca; 229 boolean isTrusted = false; 230 if (System.getSecurityManager() != null) { 231 if (!(isTrusted = (cs.getClass().getClassLoader() == null))) { 232 ba = Arrays.copyOfRange(ba, off, off + len); 233 off = 0; 234 } 235 } 236 cd.onMalformedInput(CodingErrorAction.REPLACE) 237 .onUnmappableCharacter(CodingErrorAction.REPLACE) 238 .reset(); 239 if (cd instanceof ArrayDecoder) { 240 int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca); 241 return safeTrim(ca, clen, cs, isTrusted); 242 } else { 243 ByteBuffer bb = ByteBuffer.wrap(ba, off, len); 244 CharBuffer cb = CharBuffer.wrap(ca); 245 try { 246 CoderResult cr = cd.decode(bb, cb, true); 247 if (!cr.isUnderflow()) 248 cr.throwException(); 249 cr = cd.flush(cb); 250 if (!cr.isUnderflow()) 251 cr.throwException(); 252 } catch (CharacterCodingException x) { 253 // Substitution is always enabled, 254 // so this shouldn't happen 255 throw new Error(x); 256 } 257 return safeTrim(ca, cb.position(), cs, isTrusted); 258 } 259 } 260 261 static char[] decode(byte[] ba, int off, int len) { 262 String csn = Charset.defaultCharset().name(); 263 try { 264 // use charset name decode() variant which provides caching. 265 return decode(csn, ba, off, len); 266 } catch (UnsupportedEncodingException x) { 267 warnUnsupportedCharset(csn); 268 } 269 try { 270 return decode("ISO-8859-1", ba, off, len); 271 } catch (UnsupportedEncodingException x) { 272 // If this code is hit during VM initialization, MessageUtils is 273 // the only way we will be able to get any kind of error message. 274 MessageUtils.err("ISO-8859-1 charset not available: " 275 + x.toString()); 276 // If we can not find ISO-8859-1 (a required encoding) then things 277 // are seriously wrong with the installation. 278 System.exit(1); 279 return null; 280 } 281 } 282 283 // -- Encoding -- 284 private static class StringEncoder { 285 private Charset cs; 286 private CharsetEncoder ce; 287 private final String requestedCharsetName; 288 private final boolean isTrusted; 289 290 private StringEncoder(Charset cs, String rcn) { 291 this.requestedCharsetName = rcn; 292 this.cs = cs; 293 this.ce = cs.newEncoder() 294 .onMalformedInput(CodingErrorAction.REPLACE) 295 .onUnmappableCharacter(CodingErrorAction.REPLACE); 296 this.isTrusted = (cs.getClass().getClassLoader() == null); 297 } 298 299 String charsetName() { 300 if (cs instanceof HistoricallyNamedCharset) 301 return ((HistoricallyNamedCharset)cs).historicalName(); 302 return cs.name(); 303 } 304 305 final String requestedCharsetName() { 306 return requestedCharsetName; 307 } 308 309 byte[] encode(char[] ca, int off, int len) { 310 int en = scale(len, ce.maxBytesPerChar()); 311 byte[] ba = new byte[en]; 312 if (len == 0) 313 return ba; 314 if (ce instanceof ArrayEncoder) { 315 int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba); 316 return safeTrim(ba, blen, cs, isTrusted); 317 } else { 318 ce.reset(); 319 ByteBuffer bb = ByteBuffer.wrap(ba); 320 CharBuffer cb = CharBuffer.wrap(ca, off, len); 321 try { 322 /* ----- BEGIN android ----- 323 CoderResult cr = ce.encode(cb, bb, true); 324 Pass read-only buffer, so the encoder can't alter it */ 325 CoderResult cr = ce.encode(cb.asReadOnlyBuffer(), bb, true); 326 if (!cr.isUnderflow()) 327 cr.throwException(); 328 cr = ce.flush(bb); 329 if (!cr.isUnderflow()) 330 cr.throwException(); 331 } catch (CharacterCodingException x) { 332 // Substitution is always enabled, 333 // so this shouldn't happen 334 throw new Error(x); 335 } 336 return safeTrim(ba, bb.position(), cs, isTrusted); 337 } 338 } 339 } 340 341 static byte[] encode(String charsetName, char[] ca, int off, int len) 342 throws UnsupportedEncodingException 343 { 344 StringEncoder se = deref(encoder); 345 String csn = (charsetName == null) ? "ISO-8859-1" : charsetName; 346 if ((se == null) || !(csn.equals(se.requestedCharsetName()) 347 || csn.equals(se.charsetName()))) { 348 se = null; 349 try { 350 Charset cs = lookupCharset(csn); 351 if (cs != null) 352 se = new StringEncoder(cs, csn); 353 } catch (IllegalCharsetNameException x) {} 354 if (se == null) 355 throw new UnsupportedEncodingException (csn); 356 set(encoder, se); 357 } 358 return se.encode(ca, off, len); 359 } 360 361 static byte[] encode(Charset cs, char[] ca, int off, int len) { 362 CharsetEncoder ce = cs.newEncoder(); 363 int en = scale(len, ce.maxBytesPerChar()); 364 byte[] ba = new byte[en]; 365 if (len == 0) 366 return ba; 367 boolean isTrusted = false; 368 if (System.getSecurityManager() != null) { 369 if (!(isTrusted = (cs.getClass().getClassLoader() == null))) { 370 ca = Arrays.copyOfRange(ca, off, off + len); 371 off = 0; 372 } 373 } 374 ce.onMalformedInput(CodingErrorAction.REPLACE) 375 .onUnmappableCharacter(CodingErrorAction.REPLACE) 376 .reset(); 377 if (ce instanceof ArrayEncoder) { 378 int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba); 379 return safeTrim(ba, blen, cs, isTrusted); 380 } else { 381 ByteBuffer bb = ByteBuffer.wrap(ba); 382 CharBuffer cb = CharBuffer.wrap(ca, off, len); 383 try { 384 /* ----- BEGIN android ----- 385 CoderResult cr = ce.encode(cb, bb, true); 386 Pass read-only buffer, so the encoder can't alter it */ 387 CoderResult cr = ce.encode(cb.asReadOnlyBuffer(), bb, true); 388 if (!cr.isUnderflow()) 389 cr.throwException(); 390 cr = ce.flush(bb); 391 if (!cr.isUnderflow()) 392 cr.throwException(); 393 } catch (CharacterCodingException x) { 394 throw new Error(x); 395 } 396 return safeTrim(ba, bb.position(), cs, isTrusted); 397 } 398 } 399 400 static byte[] encode(char[] ca, int off, int len) { 401 String csn = Charset.defaultCharset().name(); 402 try { 403 // use charset name encode() variant which provides caching. 404 return encode(csn, ca, off, len); 405 } catch (UnsupportedEncodingException x) { 406 warnUnsupportedCharset(csn); 407 } 408 try { 409 return encode("ISO-8859-1", ca, off, len); 410 } catch (UnsupportedEncodingException x) { 411 // If this code is hit during VM initialization, MessageUtils is 412 // the only way we will be able to get any kind of error message. 413 MessageUtils.err("ISO-8859-1 charset not available: " 414 + x.toString()); 415 // If we can not find ISO-8859-1 (a required encoding) then things 416 // are seriously wrong with the installation. 417 System.exit(1); 418 return null; 419 } 420 } 421} 422