1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package java.lang;
18
19import java.util.Locale;
20
21/**
22 * Performs case operations as described by http://unicode.org/reports/tr21/tr21-5.html.
23 */
24class CaseMapper {
25    // Intention-revealing constants for various important characters.
26    private static final char LATIN_CAPITAL_I = 'I';
27    private static final char LATIN_SMALL_I = 'i';
28    private static final char LATIN_CAPITAL_I_WITH_DOT = '\u0130';
29    private static final char LATIN_SMALL_DOTLESS_I = '\u0131';
30    private static final char COMBINING_DOT_ABOVE = '\u0307';
31    private static final char GREEK_CAPITAL_SIGMA = '\u03a3';
32    private static final char GREEK_SMALL_FINAL_SIGMA = '\u03c2';
33
34    /**
35     * Our current GC makes short-lived objects more expensive than we'd like. When that's fixed,
36     * this class should be changed so that you instantiate it with the String and its value,
37     * offset, and count fields.
38     */
39    private CaseMapper() {
40    }
41
42    /**
43     * Implements String.toLowerCase. We need 's' so that we can return the original String instance
44     * if nothing changes. We need 'value', 'offset', and 'count' because they're not otherwise
45     * accessible.
46     */
47    public static String toLowerCase(Locale locale, String s, char[] value, int offset, int count) {
48        String languageCode = locale.getLanguage();
49        boolean turkishOrAzeri = languageCode.equals("tr") || languageCode.equals("az");
50
51        char[] newValue = null;
52        int newCount = 0;
53        for (int i = offset, end = offset + count; i < end; ++i) {
54            char ch = value[i];
55            char newCh = ch;
56            if (turkishOrAzeri && ch == LATIN_CAPITAL_I_WITH_DOT) {
57                newCh = LATIN_SMALL_I;
58            } else if (turkishOrAzeri && ch == LATIN_CAPITAL_I && !followedBy(value, offset, count, i, COMBINING_DOT_ABOVE)) {
59                newCh = LATIN_SMALL_DOTLESS_I;
60            } else if (turkishOrAzeri && ch == COMBINING_DOT_ABOVE && precededBy(value, offset, count, i, LATIN_CAPITAL_I)) {
61                continue; // (We've already converted the preceding I, so we don't need to create newValue.)
62            } else if (ch == GREEK_CAPITAL_SIGMA && isFinalSigma(value, offset, count, i)) {
63                newCh = GREEK_SMALL_FINAL_SIGMA;
64            } else {
65                newCh = Character.toLowerCase(ch);
66            }
67            if (newValue == null && ch != newCh) {
68                newValue = new char[count]; // The result can't be longer than the input.
69                newCount = i - offset;
70                System.arraycopy(value, offset, newValue, 0, newCount);
71            }
72            if (newValue != null) {
73                newValue[newCount++] = newCh;
74            }
75        }
76        return newValue != null ? new String(0, newCount, newValue) : s;
77    }
78
79    private static boolean followedBy(char[] value, int offset, int count, int index, char ch) {
80        return index + 1 < offset + count && value[index + 1] == ch;
81    }
82
83    private static boolean precededBy(char[] value, int offset, int count, int index, char ch) {
84        return index > offset && value[index - 1] == ch;
85    }
86
87    /**
88     * True if 'index' is preceded by a sequence consisting of a cased letter and a case-ignorable
89     * sequence, and 'index' is not followed by a sequence consisting of an ignorable sequence and
90     * then a cased letter.
91     */
92    private static boolean isFinalSigma(char[] value, int offset, int count, int index) {
93        // TODO: we don't skip case-ignorable sequences like we should.
94        // TODO: we should add a more direct way to test for a cased letter.
95        if (index <= offset) {
96            return false;
97        }
98        char previous = value[index - 1];
99        if (!(Character.isLowerCase(previous) || Character.isUpperCase(previous) || Character.isTitleCase(previous))) {
100            return false;
101        }
102        if (index + 1 >= offset + count) {
103            return true;
104        }
105        char next = value[index + 1];
106        if (Character.isLowerCase(next) || Character.isUpperCase(next) || Character.isTitleCase(next)) {
107            return false;
108        }
109        return true;
110    }
111}
112