Hyphenator.java revision c3f2f7b93b3fd8b2eaff4942f323f60aa4548493
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.text;
18
19import com.android.internal.annotations.GuardedBy;
20
21import android.annotation.Nullable;
22import android.util.Log;
23
24import libcore.io.IoUtils;
25
26import java.io.File;
27import java.io.IOException;
28import java.util.HashMap;
29import java.util.Locale;
30
31/**
32 * Hyphenator is a wrapper class for a native implementation of automatic hyphenation,
33 * in essence finding valid hyphenation opportunities in a word.
34 *
35 * @hide
36 */
37public class Hyphenator {
38    // This class has deliberately simple lifetime management (no finalizer) because in
39    // the common case a process will use a very small number of locales.
40
41    private static String TAG = "Hyphenator";
42
43    private final static Object sLock = new Object();
44
45    @GuardedBy("sLock")
46    final static HashMap<Locale, Hyphenator> sMap = new HashMap<Locale, Hyphenator>();
47
48    final private long mNativePtr;
49
50    private Hyphenator(long nativePtr) {
51        mNativePtr = nativePtr;
52    }
53
54    public static long get(@Nullable Locale locale) {
55        synchronized (sLock) {
56            if (sMap.containsKey(locale)) {
57                Hyphenator result = sMap.get(locale);
58                return (result == null) ? 0 : result.mNativePtr;
59            }
60
61            // TODO: Convert this a proper locale-fallback system
62
63            // Fall back to language-only, if available
64            Locale languageOnlyLocale = new Locale(locale.getLanguage());
65            if (sMap.containsKey(languageOnlyLocale)) {
66                Hyphenator result = sMap.get(languageOnlyLocale);
67                sMap.put(locale, result);
68                return (result == null) ? 0 : result.mNativePtr;
69            }
70
71            // Fall back to script-only, if available
72            String script = locale.getScript();
73            if (!script.equals("")) {
74                Locale scriptOnlyLocale = new Locale.Builder()
75                        .setLanguage("und")
76                        .setScript(script)
77                        .build();
78                if (sMap.containsKey(scriptOnlyLocale)) {
79                    Hyphenator result = sMap.get(scriptOnlyLocale);
80                    sMap.put(locale, result);
81                    return (result == null) ? 0 : result.mNativePtr;
82                }
83            }
84
85            sMap.put(locale, null); // To remember we found nothing.
86        }
87        return 0;
88    }
89
90    private static Hyphenator loadHyphenator(String languageTag) {
91        String patternFilename = "hyph-"+languageTag.toLowerCase(Locale.US)+".pat.txt";
92        File patternFile = new File(getSystemHyphenatorLocation(), patternFilename);
93        try {
94            String patternData = IoUtils.readFileAsString(patternFile.getAbsolutePath());
95            long nativePtr = StaticLayout.nLoadHyphenator(patternData);
96            return new Hyphenator(nativePtr);
97        } catch (IOException e) {
98            Log.e(TAG, "error loading hyphenation " + patternFile, e);
99            return null;
100        }
101    }
102
103    private static File getSystemHyphenatorLocation() {
104        return new File("/system/usr/hyphen-data");
105    }
106
107    // This array holds pairs of language tags that are used to prefill the map from locale to
108    // hyphenation data: The hyphenation data for the first field will be prefilled from the
109    // hyphenation data for the second field.
110    //
111    // The aliases that are computable by the get() method above are not included.
112    private static final String[][] LOCALE_FALLBACK_DATA = {
113        // English locales that fall back to en-US. The data is
114        // from CLDR. It's all English locales, minus the locales whose
115        // parent is en-001 (from supplementalData.xml, under <parentLocales>).
116        // TODO: Figure out how to get this from ICU.
117        {"en-AS", "en-US"}, // English (American Samoa)
118        {"en-GU", "en-US"}, // English (Guam)
119        {"en-MH", "en-US"}, // English (Marshall Islands)
120        {"en-MP", "en-US"}, // English (Northern Mariana Islands)
121        {"en-PR", "en-US"}, // English (Puerto Rico)
122        {"en-UM", "en-US"}, // English (United States Minor Outlying Islands)
123        {"en-VI", "en-US"}, // English (Virgin Islands)
124
125        // Norwegian is very probably Norwegian Bokmål.
126        {"no", "nb"},
127
128        // Fall back to Ethiopic script for languages likely to be written in Ethiopic.
129        // Data is from CLDR's likelySubtags.xml.
130        // TODO: Convert this to a mechanism using ICU4J's ULocale#addLikelySubtags().
131        {"am", "und-Ethi"}, // Amharic
132        {"byn", "und-Ethi"}, // Blin
133        {"gez", "und-Ethi"}, // Geʻez
134        {"ti", "und-Ethi"}, // Tigrinya
135        {"wal", "und-Ethi"}, // Wolaytta
136    };
137
138    /**
139     * Load hyphenation patterns at initialization time. We want to have patterns
140     * for all locales loaded and ready to use so we don't have to do any file IO
141     * on the UI thread when drawing text in different locales.
142     *
143     * @hide
144     */
145    public static void init() {
146        sMap.put(null, null);
147
148        // TODO: replace this with a discovery-based method that looks into /system/usr/hyphen-data
149        String[] availableLanguages = {"en-US", "eu", "hu", "hy", "nb", "nn", "sa", "und-Ethi"};
150        for (int i = 0; i < availableLanguages.length; i++) {
151            String languageTag = availableLanguages[i];
152            Hyphenator h = loadHyphenator(languageTag);
153            if (h != null) {
154                sMap.put(Locale.forLanguageTag(languageTag), h);
155            }
156        }
157
158        for (int i = 0; i < LOCALE_FALLBACK_DATA.length; i++) {
159            String language = LOCALE_FALLBACK_DATA[i][0];
160            String fallback = LOCALE_FALLBACK_DATA[i][1];
161            sMap.put(Locale.forLanguageTag(language), sMap.get(Locale.forLanguageTag(fallback)));
162        }
163    }
164}
165