NetStringUtil.java revision cedac228d2dd51db4b79ea1e72c7f249408ee061
1// Copyright 2014 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5package org.chromium.net; 6 7import org.chromium.base.CalledByNative; 8import org.chromium.base.JNINamespace; 9 10import java.nio.ByteBuffer; 11import java.nio.charset.Charset; 12import java.nio.charset.CharsetDecoder; 13import java.nio.charset.CodingErrorAction; 14import java.text.Normalizer; 15 16/** 17 * Utility functions for converting strings between formats when not built with 18 * icu. 19 */ 20@JNINamespace("net::android") 21public class NetStringUtil { 22 /** 23 * Attempts to convert text in a given character set to a Unicode string. 24 * Returns null on failure. 25 * @param text ByteBuffer containing the character array to convert. 26 * @param charset Character set it's in encoded in. 27 * @return: Unicode string on success, null on failure. 28 */ 29 @CalledByNative 30 private static String convertToUnicode( 31 ByteBuffer text, 32 String charset_name) { 33 try { 34 Charset charset = Charset.forName(charset_name); 35 CharsetDecoder decoder = charset.newDecoder(); 36 // On invalid characters, this will throw an exception. 37 return decoder.decode(text).toString(); 38 } catch (Exception e) { 39 return null; 40 } 41 } 42 43 /** 44 * Attempts to convert text in a given character set to a Unicode string, 45 * and normalize it. Returns null on failure. 46 * @param text ByteBuffer containing the character array to convert. 47 * @param charset Character set it's in encoded in. 48 * @return: Unicode string on success, null on failure. 49 */ 50 @CalledByNative 51 private static String convertToUnicodeAndNormalize( 52 ByteBuffer text, 53 String charset_name) { 54 String unicodeString = convertToUnicode(text, charset_name); 55 if (unicodeString == null) 56 return unicodeString; 57 return Normalizer.normalize(unicodeString, Normalizer.Form.NFC); 58 } 59 60 /** 61 * Convert text in a given character set to a Unicode string. Any invalid 62 * characters are replaced with U+FFFD. Returns null if the character set 63 * is not recognized. 64 * @param text ByteBuffer containing the character array to convert. 65 * @param charset Character set it's in encoded in. 66 * @return: Unicode string on success, null on failure. 67 */ 68 @CalledByNative 69 private static String convertToUnicodeWithSubstitutions( 70 ByteBuffer text, 71 String charset_name) { 72 try { 73 Charset charset = Charset.forName(charset_name); 74 75 // TODO(mmenke): Investigate if Charset.decode() can be used 76 // instead. The question is whether it uses the proper replace 77 // character. JDK CharsetDecoder docs say U+FFFD is the default, 78 // but Charset.decode() docs say it uses the "charset's default 79 // replacement byte array". 80 CharsetDecoder decoder = charset.newDecoder(); 81 decoder.onMalformedInput(CodingErrorAction.REPLACE); 82 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 83 decoder.replaceWith("\uFFFD"); 84 return decoder.decode(text).toString(); 85 } catch (Exception e) { 86 return null; 87 } 88 } 89} 90