Hyphenator.java revision c3f2f7b93b3fd8b2eaff4942f323f60aa4548493
1/* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package android.text; 18 19import com.android.internal.annotations.GuardedBy; 20 21import android.annotation.Nullable; 22import android.util.Log; 23 24import libcore.io.IoUtils; 25 26import java.io.File; 27import java.io.IOException; 28import java.util.HashMap; 29import java.util.Locale; 30 31/** 32 * Hyphenator is a wrapper class for a native implementation of automatic hyphenation, 33 * in essence finding valid hyphenation opportunities in a word. 34 * 35 * @hide 36 */ 37public class Hyphenator { 38 // This class has deliberately simple lifetime management (no finalizer) because in 39 // the common case a process will use a very small number of locales. 40 41 private static String TAG = "Hyphenator"; 42 43 private final static Object sLock = new Object(); 44 45 @GuardedBy("sLock") 46 final static HashMap<Locale, Hyphenator> sMap = new HashMap<Locale, Hyphenator>(); 47 48 final private long mNativePtr; 49 50 private Hyphenator(long nativePtr) { 51 mNativePtr = nativePtr; 52 } 53 54 public static long get(@Nullable Locale locale) { 55 synchronized (sLock) { 56 if (sMap.containsKey(locale)) { 57 Hyphenator result = sMap.get(locale); 58 return (result == null) ? 0 : result.mNativePtr; 59 } 60 61 // TODO: Convert this a proper locale-fallback system 62 63 // Fall back to language-only, if available 64 Locale languageOnlyLocale = new Locale(locale.getLanguage()); 65 if (sMap.containsKey(languageOnlyLocale)) { 66 Hyphenator result = sMap.get(languageOnlyLocale); 67 sMap.put(locale, result); 68 return (result == null) ? 0 : result.mNativePtr; 69 } 70 71 // Fall back to script-only, if available 72 String script = locale.getScript(); 73 if (!script.equals("")) { 74 Locale scriptOnlyLocale = new Locale.Builder() 75 .setLanguage("und") 76 .setScript(script) 77 .build(); 78 if (sMap.containsKey(scriptOnlyLocale)) { 79 Hyphenator result = sMap.get(scriptOnlyLocale); 80 sMap.put(locale, result); 81 return (result == null) ? 0 : result.mNativePtr; 82 } 83 } 84 85 sMap.put(locale, null); // To remember we found nothing. 86 } 87 return 0; 88 } 89 90 private static Hyphenator loadHyphenator(String languageTag) { 91 String patternFilename = "hyph-"+languageTag.toLowerCase(Locale.US)+".pat.txt"; 92 File patternFile = new File(getSystemHyphenatorLocation(), patternFilename); 93 try { 94 String patternData = IoUtils.readFileAsString(patternFile.getAbsolutePath()); 95 long nativePtr = StaticLayout.nLoadHyphenator(patternData); 96 return new Hyphenator(nativePtr); 97 } catch (IOException e) { 98 Log.e(TAG, "error loading hyphenation " + patternFile, e); 99 return null; 100 } 101 } 102 103 private static File getSystemHyphenatorLocation() { 104 return new File("/system/usr/hyphen-data"); 105 } 106 107 // This array holds pairs of language tags that are used to prefill the map from locale to 108 // hyphenation data: The hyphenation data for the first field will be prefilled from the 109 // hyphenation data for the second field. 110 // 111 // The aliases that are computable by the get() method above are not included. 112 private static final String[][] LOCALE_FALLBACK_DATA = { 113 // English locales that fall back to en-US. The data is 114 // from CLDR. It's all English locales, minus the locales whose 115 // parent is en-001 (from supplementalData.xml, under <parentLocales>). 116 // TODO: Figure out how to get this from ICU. 117 {"en-AS", "en-US"}, // English (American Samoa) 118 {"en-GU", "en-US"}, // English (Guam) 119 {"en-MH", "en-US"}, // English (Marshall Islands) 120 {"en-MP", "en-US"}, // English (Northern Mariana Islands) 121 {"en-PR", "en-US"}, // English (Puerto Rico) 122 {"en-UM", "en-US"}, // English (United States Minor Outlying Islands) 123 {"en-VI", "en-US"}, // English (Virgin Islands) 124 125 // Norwegian is very probably Norwegian Bokmål. 126 {"no", "nb"}, 127 128 // Fall back to Ethiopic script for languages likely to be written in Ethiopic. 129 // Data is from CLDR's likelySubtags.xml. 130 // TODO: Convert this to a mechanism using ICU4J's ULocale#addLikelySubtags(). 131 {"am", "und-Ethi"}, // Amharic 132 {"byn", "und-Ethi"}, // Blin 133 {"gez", "und-Ethi"}, // Geʻez 134 {"ti", "und-Ethi"}, // Tigrinya 135 {"wal", "und-Ethi"}, // Wolaytta 136 }; 137 138 /** 139 * Load hyphenation patterns at initialization time. We want to have patterns 140 * for all locales loaded and ready to use so we don't have to do any file IO 141 * on the UI thread when drawing text in different locales. 142 * 143 * @hide 144 */ 145 public static void init() { 146 sMap.put(null, null); 147 148 // TODO: replace this with a discovery-based method that looks into /system/usr/hyphen-data 149 String[] availableLanguages = {"en-US", "eu", "hu", "hy", "nb", "nn", "sa", "und-Ethi"}; 150 for (int i = 0; i < availableLanguages.length; i++) { 151 String languageTag = availableLanguages[i]; 152 Hyphenator h = loadHyphenator(languageTag); 153 if (h != null) { 154 sMap.put(Locale.forLanguageTag(languageTag), h); 155 } 156 } 157 158 for (int i = 0; i < LOCALE_FALLBACK_DATA.length; i++) { 159 String language = LOCALE_FALLBACK_DATA[i][0]; 160 String fallback = LOCALE_FALLBACK_DATA[i][1]; 161 sMap.put(Locale.forLanguageTag(language), sMap.get(Locale.forLanguageTag(fallback))); 162 } 163 } 164} 165