1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.text;
18
19import android.annotation.Nullable;
20import android.util.Log;
21
22import com.android.internal.annotations.GuardedBy;
23
24import java.io.File;
25import java.io.IOException;
26import java.io.RandomAccessFile;
27import java.nio.ByteBuffer;
28import java.nio.MappedByteBuffer;
29import java.nio.channels.FileChannel;
30import java.util.HashMap;
31import java.util.Locale;
32
33/**
34 * Hyphenator is a wrapper class for a native implementation of automatic hyphenation,
35 * in essence finding valid hyphenation opportunities in a word.
36 *
37 * @hide
38 */
39public class Hyphenator {
40    // This class has deliberately simple lifetime management (no finalizer) because in
41    // the common case a process will use a very small number of locales.
42
43    private static String TAG = "Hyphenator";
44
45    private final static Object sLock = new Object();
46
47    @GuardedBy("sLock")
48    final static HashMap<Locale, Hyphenator> sMap = new HashMap<Locale, Hyphenator>();
49
50    final static Hyphenator sEmptyHyphenator =
51            new Hyphenator(StaticLayout.nLoadHyphenator(null, 0), null);
52
53    final private long mNativePtr;
54
55    // We retain a reference to the buffer to keep the memory mapping valid
56    @SuppressWarnings("unused")
57    final private ByteBuffer mBuffer;
58
59    private Hyphenator(long nativePtr, ByteBuffer b) {
60        mNativePtr = nativePtr;
61        mBuffer = b;
62    }
63
64    public long getNativePtr() {
65        return mNativePtr;
66    }
67
68    public static Hyphenator get(@Nullable Locale locale) {
69        synchronized (sLock) {
70            Hyphenator result = sMap.get(locale);
71            if (result != null) {
72                return result;
73            }
74
75            // If there's a variant, fall back to language+variant only, if available
76            final String variant = locale.getVariant();
77            if (!variant.isEmpty()) {
78                final Locale languageAndVariantOnlyLocale =
79                        new Locale(locale.getLanguage(), "", variant);
80                result = sMap.get(languageAndVariantOnlyLocale);
81                if (result != null) {
82                    sMap.put(locale, result);
83                    return result;
84                }
85            }
86
87            // Fall back to language-only, if available
88            final Locale languageOnlyLocale = new Locale(locale.getLanguage());
89            result = sMap.get(languageOnlyLocale);
90            if (result != null) {
91                sMap.put(locale, result);
92                return result;
93            }
94
95            // Fall back to script-only, if available
96            final String script = locale.getScript();
97            if (!script.equals("")) {
98                final Locale scriptOnlyLocale = new Locale.Builder()
99                        .setLanguage("und")
100                        .setScript(script)
101                        .build();
102                result = sMap.get(scriptOnlyLocale);
103                if (result != null) {
104                    sMap.put(locale, result);
105                    return result;
106                }
107            }
108
109            sMap.put(locale, sEmptyHyphenator);  // To remember we found nothing.
110        }
111        return sEmptyHyphenator;
112    }
113
114    private static Hyphenator loadHyphenator(String languageTag) {
115        String patternFilename = "hyph-" + languageTag.toLowerCase(Locale.US) + ".hyb";
116        File patternFile = new File(getSystemHyphenatorLocation(), patternFilename);
117        try {
118            RandomAccessFile f = new RandomAccessFile(patternFile, "r");
119            try {
120                FileChannel fc = f.getChannel();
121                MappedByteBuffer buf = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
122                long nativePtr = StaticLayout.nLoadHyphenator(buf, 0);
123                return new Hyphenator(nativePtr, buf);
124            } finally {
125                f.close();
126            }
127        } catch (IOException e) {
128            Log.e(TAG, "error loading hyphenation " + patternFile, e);
129            return null;
130        }
131    }
132
133    private static File getSystemHyphenatorLocation() {
134        return new File("/system/usr/hyphen-data");
135    }
136
137    // This array holds pairs of language tags that are used to prefill the map from locale to
138    // hyphenation data: The hyphenation data for the first field will be prefilled from the
139    // hyphenation data for the second field.
140    //
141    // The aliases that are computable by the get() method above are not included.
142    private static final String[][] LOCALE_FALLBACK_DATA = {
143        // English locales that fall back to en-US. The data is
144        // from CLDR. It's all English locales, minus the locales whose
145        // parent is en-001 (from supplementalData.xml, under <parentLocales>).
146        // TODO: Figure out how to get this from ICU.
147        {"en-AS", "en-US"}, // English (American Samoa)
148        {"en-GU", "en-US"}, // English (Guam)
149        {"en-MH", "en-US"}, // English (Marshall Islands)
150        {"en-MP", "en-US"}, // English (Northern Mariana Islands)
151        {"en-PR", "en-US"}, // English (Puerto Rico)
152        {"en-UM", "en-US"}, // English (United States Minor Outlying Islands)
153        {"en-VI", "en-US"}, // English (Virgin Islands)
154
155        // All English locales other than those falling back to en-US are mapped to en-GB.
156        {"en", "en-GB"},
157
158        // For German, we're assuming the 1996 (and later) orthography by default.
159        {"de", "de-1996"},
160        // Liechtenstein uses the Swiss hyphenation rules for the 1901 orthography.
161        {"de-LI-1901", "de-CH-1901"},
162
163        // Norwegian is very probably Norwegian Bokmål.
164        {"no", "nb"},
165
166        // Use mn-Cyrl. According to CLDR's likelySubtags.xml, mn is most likely to be mn-Cyrl.
167        {"mn", "mn-Cyrl"}, // Mongolian
168
169        // Fall back to Ethiopic script for languages likely to be written in Ethiopic.
170        // Data is from CLDR's likelySubtags.xml.
171        // TODO: Convert this to a mechanism using ICU4J's ULocale#addLikelySubtags().
172        {"am", "und-Ethi"}, // Amharic
173        {"byn", "und-Ethi"}, // Blin
174        {"gez", "und-Ethi"}, // Geʻez
175        {"ti", "und-Ethi"}, // Tigrinya
176        {"wal", "und-Ethi"}, // Wolaytta
177    };
178
179    /**
180     * Load hyphenation patterns at initialization time. We want to have patterns
181     * for all locales loaded and ready to use so we don't have to do any file IO
182     * on the UI thread when drawing text in different locales.
183     *
184     * @hide
185     */
186    public static void init() {
187        sMap.put(null, null);
188
189        // TODO: replace this with a discovery-based method that looks into /system/usr/hyphen-data
190        String[] availableLanguages = {
191            "as",
192            "bn",
193            "cy",
194            "da",
195            "de-1901", "de-1996", "de-CH-1901",
196            "en-GB", "en-US",
197            "es",
198            "et",
199            "eu",
200            "fr",
201            "ga",
202            "gu",
203            "hi",
204            "hr",
205            "hu",
206            "hy",
207            "kn",
208            "ml",
209            "mn-Cyrl",
210            "mr",
211            "nb",
212            "nn",
213            "or",
214            "pa",
215            "pt",
216            "sl",
217            "ta",
218            "te",
219            "tk",
220            "und-Ethi",
221        };
222        for (int i = 0; i < availableLanguages.length; i++) {
223            String languageTag = availableLanguages[i];
224            Hyphenator h = loadHyphenator(languageTag);
225            if (h != null) {
226                sMap.put(Locale.forLanguageTag(languageTag), h);
227            }
228        }
229
230        for (int i = 0; i < LOCALE_FALLBACK_DATA.length; i++) {
231            String language = LOCALE_FALLBACK_DATA[i][0];
232            String fallback = LOCALE_FALLBACK_DATA[i][1];
233            sMap.put(Locale.forLanguageTag(language), sMap.get(Locale.forLanguageTag(fallback)));
234        }
235    }
236}
237