1/* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package android.text; 18 19import android.annotation.Nullable; 20import android.util.Log; 21 22import com.android.internal.annotations.GuardedBy; 23 24import java.io.File; 25import java.io.IOException; 26import java.io.RandomAccessFile; 27import java.nio.ByteBuffer; 28import java.nio.MappedByteBuffer; 29import java.nio.channels.FileChannel; 30import java.util.HashMap; 31import java.util.Locale; 32 33/** 34 * Hyphenator is a wrapper class for a native implementation of automatic hyphenation, 35 * in essence finding valid hyphenation opportunities in a word. 36 * 37 * @hide 38 */ 39public class Hyphenator { 40 // This class has deliberately simple lifetime management (no finalizer) because in 41 // the common case a process will use a very small number of locales. 42 43 private static String TAG = "Hyphenator"; 44 45 private final static Object sLock = new Object(); 46 47 @GuardedBy("sLock") 48 final static HashMap<Locale, Hyphenator> sMap = new HashMap<Locale, Hyphenator>(); 49 50 final static Hyphenator sEmptyHyphenator = 51 new Hyphenator(StaticLayout.nLoadHyphenator(null, 0), null); 52 53 final private long mNativePtr; 54 55 // We retain a reference to the buffer to keep the memory mapping valid 56 @SuppressWarnings("unused") 57 final private ByteBuffer mBuffer; 58 59 private Hyphenator(long nativePtr, ByteBuffer b) { 60 mNativePtr = nativePtr; 61 mBuffer = b; 62 } 63 64 public long getNativePtr() { 65 return mNativePtr; 66 } 67 68 public static Hyphenator get(@Nullable Locale locale) { 69 synchronized (sLock) { 70 Hyphenator result = sMap.get(locale); 71 if (result != null) { 72 return result; 73 } 74 75 // If there's a variant, fall back to language+variant only, if available 76 final String variant = locale.getVariant(); 77 if (!variant.isEmpty()) { 78 final Locale languageAndVariantOnlyLocale = 79 new Locale(locale.getLanguage(), "", variant); 80 result = sMap.get(languageAndVariantOnlyLocale); 81 if (result != null) { 82 sMap.put(locale, result); 83 return result; 84 } 85 } 86 87 // Fall back to language-only, if available 88 final Locale languageOnlyLocale = new Locale(locale.getLanguage()); 89 result = sMap.get(languageOnlyLocale); 90 if (result != null) { 91 sMap.put(locale, result); 92 return result; 93 } 94 95 // Fall back to script-only, if available 96 final String script = locale.getScript(); 97 if (!script.equals("")) { 98 final Locale scriptOnlyLocale = new Locale.Builder() 99 .setLanguage("und") 100 .setScript(script) 101 .build(); 102 result = sMap.get(scriptOnlyLocale); 103 if (result != null) { 104 sMap.put(locale, result); 105 return result; 106 } 107 } 108 109 sMap.put(locale, sEmptyHyphenator); // To remember we found nothing. 110 } 111 return sEmptyHyphenator; 112 } 113 114 private static Hyphenator loadHyphenator(String languageTag) { 115 String patternFilename = "hyph-" + languageTag.toLowerCase(Locale.US) + ".hyb"; 116 File patternFile = new File(getSystemHyphenatorLocation(), patternFilename); 117 try { 118 RandomAccessFile f = new RandomAccessFile(patternFile, "r"); 119 try { 120 FileChannel fc = f.getChannel(); 121 MappedByteBuffer buf = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size()); 122 long nativePtr = StaticLayout.nLoadHyphenator(buf, 0); 123 return new Hyphenator(nativePtr, buf); 124 } finally { 125 f.close(); 126 } 127 } catch (IOException e) { 128 Log.e(TAG, "error loading hyphenation " + patternFile, e); 129 return null; 130 } 131 } 132 133 private static File getSystemHyphenatorLocation() { 134 return new File("/system/usr/hyphen-data"); 135 } 136 137 // This array holds pairs of language tags that are used to prefill the map from locale to 138 // hyphenation data: The hyphenation data for the first field will be prefilled from the 139 // hyphenation data for the second field. 140 // 141 // The aliases that are computable by the get() method above are not included. 142 private static final String[][] LOCALE_FALLBACK_DATA = { 143 // English locales that fall back to en-US. The data is 144 // from CLDR. It's all English locales, minus the locales whose 145 // parent is en-001 (from supplementalData.xml, under <parentLocales>). 146 // TODO: Figure out how to get this from ICU. 147 {"en-AS", "en-US"}, // English (American Samoa) 148 {"en-GU", "en-US"}, // English (Guam) 149 {"en-MH", "en-US"}, // English (Marshall Islands) 150 {"en-MP", "en-US"}, // English (Northern Mariana Islands) 151 {"en-PR", "en-US"}, // English (Puerto Rico) 152 {"en-UM", "en-US"}, // English (United States Minor Outlying Islands) 153 {"en-VI", "en-US"}, // English (Virgin Islands) 154 155 // All English locales other than those falling back to en-US are mapped to en-GB. 156 {"en", "en-GB"}, 157 158 // For German, we're assuming the 1996 (and later) orthography by default. 159 {"de", "de-1996"}, 160 // Liechtenstein uses the Swiss hyphenation rules for the 1901 orthography. 161 {"de-LI-1901", "de-CH-1901"}, 162 163 // Norwegian is very probably Norwegian Bokmål. 164 {"no", "nb"}, 165 166 // Use mn-Cyrl. According to CLDR's likelySubtags.xml, mn is most likely to be mn-Cyrl. 167 {"mn", "mn-Cyrl"}, // Mongolian 168 169 // Fall back to Ethiopic script for languages likely to be written in Ethiopic. 170 // Data is from CLDR's likelySubtags.xml. 171 // TODO: Convert this to a mechanism using ICU4J's ULocale#addLikelySubtags(). 172 {"am", "und-Ethi"}, // Amharic 173 {"byn", "und-Ethi"}, // Blin 174 {"gez", "und-Ethi"}, // Geʻez 175 {"ti", "und-Ethi"}, // Tigrinya 176 {"wal", "und-Ethi"}, // Wolaytta 177 }; 178 179 /** 180 * Load hyphenation patterns at initialization time. We want to have patterns 181 * for all locales loaded and ready to use so we don't have to do any file IO 182 * on the UI thread when drawing text in different locales. 183 * 184 * @hide 185 */ 186 public static void init() { 187 sMap.put(null, null); 188 189 // TODO: replace this with a discovery-based method that looks into /system/usr/hyphen-data 190 String[] availableLanguages = { 191 "as", 192 "bn", 193 "cy", 194 "da", 195 "de-1901", "de-1996", "de-CH-1901", 196 "en-GB", "en-US", 197 "es", 198 "et", 199 "eu", 200 "fr", 201 "ga", 202 "gu", 203 "hi", 204 "hr", 205 "hu", 206 "hy", 207 "kn", 208 "ml", 209 "mn-Cyrl", 210 "mr", 211 "nb", 212 "nn", 213 "or", 214 "pa", 215 "pt", 216 "sl", 217 "ta", 218 "te", 219 "tk", 220 "und-Ethi", 221 }; 222 for (int i = 0; i < availableLanguages.length; i++) { 223 String languageTag = availableLanguages[i]; 224 Hyphenator h = loadHyphenator(languageTag); 225 if (h != null) { 226 sMap.put(Locale.forLanguageTag(languageTag), h); 227 } 228 } 229 230 for (int i = 0; i < LOCALE_FALLBACK_DATA.length; i++) { 231 String language = LOCALE_FALLBACK_DATA[i][0]; 232 String fallback = LOCALE_FALLBACK_DATA[i][1]; 233 sMap.put(Locale.forLanguageTag(language), sMap.get(Locale.forLanguageTag(fallback))); 234 } 235 } 236} 237