1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5package org.chromium.net;
6
7import org.chromium.base.CalledByNative;
8import org.chromium.base.JNINamespace;
9
10import java.nio.ByteBuffer;
11import java.nio.charset.Charset;
12import java.nio.charset.CharsetDecoder;
13import java.nio.charset.CodingErrorAction;
14import java.text.Normalizer;
15
16/**
17 * Utility functions for converting strings between formats when not built with
18 * icu.
19 */
20@JNINamespace("net::android")
21public class NetStringUtil {
22    /**
23     * Attempts to convert text in a given character set to a Unicode string.
24     * Returns null on failure.
25     * @param text ByteBuffer containing the character array to convert.
26     * @param charsetName Character set it's in encoded in.
27     * @return: Unicode string on success, null on failure.
28     */
29    @CalledByNative
30    private static String convertToUnicode(
31            ByteBuffer text,
32            String charsetName) {
33        try {
34            Charset charset = Charset.forName(charsetName);
35            CharsetDecoder decoder = charset.newDecoder();
36            // On invalid characters, this will throw an exception.
37            return decoder.decode(text).toString();
38        } catch (Exception e) {
39            return null;
40        }
41    }
42
43    /**
44     * Attempts to convert text in a given character set to a Unicode string,
45     * and normalize it.  Returns null on failure.
46     * @param text ByteBuffer containing the character array to convert.
47     * @param charsetName Character set it's in encoded in.
48     * @return: Unicode string on success, null on failure.
49     */
50    @CalledByNative
51    private static String convertToUnicodeAndNormalize(
52            ByteBuffer text,
53            String charsetName) {
54        String unicodeString = convertToUnicode(text, charsetName);
55        if (unicodeString == null) return null;
56        return Normalizer.normalize(unicodeString, Normalizer.Form.NFC);
57    }
58
59    /**
60     * Convert text in a given character set to a Unicode string.  Any invalid
61     * characters are replaced with U+FFFD.  Returns null if the character set
62     * is not recognized.
63     * @param text ByteBuffer containing the character array to convert.
64     * @param charsetName Character set it's in encoded in.
65     * @return: Unicode string on success, null on failure.
66     */
67    @CalledByNative
68    private static String convertToUnicodeWithSubstitutions(
69            ByteBuffer text,
70            String charsetName) {
71        try {
72            Charset charset = Charset.forName(charsetName);
73
74            // TODO(mmenke):  Investigate if Charset.decode() can be used
75            // instead.  The question is whether it uses the proper replace
76            // character.  JDK CharsetDecoder docs say U+FFFD is the default,
77            // but Charset.decode() docs say it uses the "charset's default
78            // replacement byte array".
79            CharsetDecoder decoder = charset.newDecoder();
80            decoder.onMalformedInput(CodingErrorAction.REPLACE);
81            decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
82            decoder.replaceWith("\uFFFD");
83            return decoder.decode(text).toString();
84        } catch (Exception e) {
85            return null;
86        }
87    }
88}
89