1// Copyright 2014 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5package org.chromium.net; 6 7import org.chromium.base.CalledByNative; 8import org.chromium.base.JNINamespace; 9 10import java.nio.ByteBuffer; 11import java.nio.charset.Charset; 12import java.nio.charset.CharsetDecoder; 13import java.nio.charset.CodingErrorAction; 14import java.text.Normalizer; 15 16/** 17 * Utility functions for converting strings between formats when not built with 18 * icu. 19 */ 20@JNINamespace("net::android") 21public class NetStringUtil { 22 /** 23 * Attempts to convert text in a given character set to a Unicode string. 24 * Returns null on failure. 25 * @param text ByteBuffer containing the character array to convert. 26 * @param charsetName Character set it's in encoded in. 27 * @return: Unicode string on success, null on failure. 28 */ 29 @CalledByNative 30 private static String convertToUnicode( 31 ByteBuffer text, 32 String charsetName) { 33 try { 34 Charset charset = Charset.forName(charsetName); 35 CharsetDecoder decoder = charset.newDecoder(); 36 // On invalid characters, this will throw an exception. 37 return decoder.decode(text).toString(); 38 } catch (Exception e) { 39 return null; 40 } 41 } 42 43 /** 44 * Attempts to convert text in a given character set to a Unicode string, 45 * and normalize it. Returns null on failure. 46 * @param text ByteBuffer containing the character array to convert. 47 * @param charsetName Character set it's in encoded in. 48 * @return: Unicode string on success, null on failure. 49 */ 50 @CalledByNative 51 private static String convertToUnicodeAndNormalize( 52 ByteBuffer text, 53 String charsetName) { 54 String unicodeString = convertToUnicode(text, charsetName); 55 if (unicodeString == null) return null; 56 return Normalizer.normalize(unicodeString, Normalizer.Form.NFC); 57 } 58 59 /** 60 * Convert text in a given character set to a Unicode string. Any invalid 61 * characters are replaced with U+FFFD. Returns null if the character set 62 * is not recognized. 63 * @param text ByteBuffer containing the character array to convert. 64 * @param charsetName Character set it's in encoded in. 65 * @return: Unicode string on success, null on failure. 66 */ 67 @CalledByNative 68 private static String convertToUnicodeWithSubstitutions( 69 ByteBuffer text, 70 String charsetName) { 71 try { 72 Charset charset = Charset.forName(charsetName); 73 74 // TODO(mmenke): Investigate if Charset.decode() can be used 75 // instead. The question is whether it uses the proper replace 76 // character. JDK CharsetDecoder docs say U+FFFD is the default, 77 // but Charset.decode() docs say it uses the "charset's default 78 // replacement byte array". 79 CharsetDecoder decoder = charset.newDecoder(); 80 decoder.onMalformedInput(CodingErrorAction.REPLACE); 81 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 82 decoder.replaceWith("\uFFFD"); 83 return decoder.decode(text).toString(); 84 } catch (Exception e) { 85 return null; 86 } 87 } 88} 89