1/* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package java.lang; 18 19import java.util.Locale; 20 21/** 22 * Performs case operations as described by http://unicode.org/reports/tr21/tr21-5.html. 23 */ 24class CaseMapper { 25 // Intention-revealing constants for various important characters. 26 private static final char LATIN_CAPITAL_I = 'I'; 27 private static final char LATIN_SMALL_I = 'i'; 28 private static final char LATIN_CAPITAL_I_WITH_DOT = '\u0130'; 29 private static final char LATIN_SMALL_DOTLESS_I = '\u0131'; 30 private static final char COMBINING_DOT_ABOVE = '\u0307'; 31 private static final char GREEK_CAPITAL_SIGMA = '\u03a3'; 32 private static final char GREEK_SMALL_FINAL_SIGMA = '\u03c2'; 33 34 /** 35 * Our current GC makes short-lived objects more expensive than we'd like. When that's fixed, 36 * this class should be changed so that you instantiate it with the String and its value, 37 * offset, and count fields. 38 */ 39 private CaseMapper() { 40 } 41 42 /** 43 * Implements String.toLowerCase. We need 's' so that we can return the original String instance 44 * if nothing changes. We need 'value', 'offset', and 'count' because they're not otherwise 45 * accessible. 46 */ 47 public static String toLowerCase(Locale locale, String s, char[] value, int offset, int count) { 48 String languageCode = locale.getLanguage(); 49 boolean turkishOrAzeri = languageCode.equals("tr") || languageCode.equals("az"); 50 51 char[] newValue = null; 52 int newCount = 0; 53 for (int i = offset, end = offset + count; i < end; ++i) { 54 char ch = value[i]; 55 char newCh = ch; 56 if (turkishOrAzeri && ch == LATIN_CAPITAL_I_WITH_DOT) { 57 newCh = LATIN_SMALL_I; 58 } else if (turkishOrAzeri && ch == LATIN_CAPITAL_I && !followedBy(value, offset, count, i, COMBINING_DOT_ABOVE)) { 59 newCh = LATIN_SMALL_DOTLESS_I; 60 } else if (turkishOrAzeri && ch == COMBINING_DOT_ABOVE && precededBy(value, offset, count, i, LATIN_CAPITAL_I)) { 61 continue; // (We've already converted the preceding I, so we don't need to create newValue.) 62 } else if (ch == GREEK_CAPITAL_SIGMA && isFinalSigma(value, offset, count, i)) { 63 newCh = GREEK_SMALL_FINAL_SIGMA; 64 } else { 65 newCh = Character.toLowerCase(ch); 66 } 67 if (newValue == null && ch != newCh) { 68 newValue = new char[count]; // The result can't be longer than the input. 69 newCount = i - offset; 70 System.arraycopy(value, offset, newValue, 0, newCount); 71 } 72 if (newValue != null) { 73 newValue[newCount++] = newCh; 74 } 75 } 76 return newValue != null ? new String(0, newCount, newValue) : s; 77 } 78 79 private static boolean followedBy(char[] value, int offset, int count, int index, char ch) { 80 return index + 1 < offset + count && value[index + 1] == ch; 81 } 82 83 private static boolean precededBy(char[] value, int offset, int count, int index, char ch) { 84 return index > offset && value[index - 1] == ch; 85 } 86 87 /** 88 * True if 'index' is preceded by a sequence consisting of a cased letter and a case-ignorable 89 * sequence, and 'index' is not followed by a sequence consisting of an ignorable sequence and 90 * then a cased letter. 91 */ 92 private static boolean isFinalSigma(char[] value, int offset, int count, int index) { 93 // TODO: we don't skip case-ignorable sequences like we should. 94 // TODO: we should add a more direct way to test for a cased letter. 95 if (index <= offset) { 96 return false; 97 } 98 char previous = value[index - 1]; 99 if (!(Character.isLowerCase(previous) || Character.isUpperCase(previous) || Character.isTitleCase(previous))) { 100 return false; 101 } 102 if (index + 1 >= offset + count) { 103 return true; 104 } 105 char next = value[index + 1]; 106 if (Character.isLowerCase(next) || Character.isUpperCase(next) || Character.isTitleCase(next)) { 107 return false; 108 } 109 return true; 110 } 111} 112