1/*
2 * Copyright (c) 2006, 2007, 2008, 2009, 2010, 2012 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 *     * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *     * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 *     * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include "config.h"
32#include "platform/fonts/win/FontFallbackWin.h"
33
34#include "platform/win/HWndDC.h"
35#include "wtf/HashMap.h"
36#include "wtf/text/StringHash.h"
37#include "wtf/text/WTFString.h"
38#include <limits>
39#include <unicode/locid.h>
40#include <unicode/uchar.h>
41
42namespace WebCore {
43
44namespace {
45
46bool isFontPresent(const UChar* fontName)
47{
48    HFONT hfont = CreateFont(12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, fontName);
49    if (!hfont)
50        return false;
51    HWndDC dc(0);
52    HGDIOBJ oldFont = static_cast<HFONT>(SelectObject(dc, hfont));
53    WCHAR actualFontName[LF_FACESIZE];
54    GetTextFace(dc, LF_FACESIZE, actualFontName);
55    actualFontName[LF_FACESIZE - 1] = 0;
56    SelectObject(dc, oldFont);
57    DeleteObject(hfont);
58    // We don't have to worry about East Asian fonts with locale-dependent
59    // names here for now.
60    // FIXME: Why not?
61    return !wcscmp(fontName, actualFontName);
62}
63
64// A simple mapping from UScriptCode to family name. This is a sparse array,
65// which works well since the range of UScriptCode values is small.
66typedef const UChar* ScriptToFontMap[USCRIPT_CODE_LIMIT];
67
68void initializeScriptFontMap(ScriptToFontMap& scriptFontMap)
69{
70    struct FontMap {
71        UScriptCode script;
72        const UChar* family;
73    };
74
75    static const FontMap fontMap[] = {
76        {USCRIPT_LATIN, L"times new roman"},
77        {USCRIPT_GREEK, L"times new roman"},
78        {USCRIPT_CYRILLIC, L"times new roman"},
79        // FIXME: Consider trying new Vista fonts before XP fonts for CJK.
80        // Some Vista users do want to use Vista cleartype CJK fonts. If we
81        // did, the results of tests with CJK characters would have to be
82        // regenerated for Vista.
83        {USCRIPT_SIMPLIFIED_HAN, L"simsun"},
84        {USCRIPT_TRADITIONAL_HAN, L"pmingliu"},
85        {USCRIPT_HIRAGANA, L"ms pgothic"},
86        {USCRIPT_KATAKANA, L"ms pgothic"},
87        {USCRIPT_KATAKANA_OR_HIRAGANA, L"ms pgothic"},
88        {USCRIPT_HANGUL, L"gulim"},
89        {USCRIPT_THAI, L"tahoma"},
90        {USCRIPT_HEBREW, L"david"},
91        {USCRIPT_ARABIC, L"tahoma"},
92        {USCRIPT_DEVANAGARI, L"mangal"},
93        {USCRIPT_BENGALI, L"vrinda"},
94        {USCRIPT_GURMUKHI, L"raavi"},
95        {USCRIPT_GUJARATI, L"shruti"},
96        {USCRIPT_TAMIL, L"latha"},
97        {USCRIPT_TELUGU, L"gautami"},
98        {USCRIPT_KANNADA, L"tunga"},
99        {USCRIPT_GEORGIAN, L"sylfaen"},
100        {USCRIPT_ARMENIAN, L"sylfaen"},
101        {USCRIPT_THAANA, L"mv boli"},
102        {USCRIPT_CANADIAN_ABORIGINAL, L"euphemia"},
103        {USCRIPT_CHEROKEE, L"plantagenet cherokee"},
104        {USCRIPT_MONGOLIAN, L"mongolian balti"},
105        // For USCRIPT_COMMON, we map blocks to scripts when
106        // that makes sense.
107    };
108
109    struct ScriptToFontFamilies {
110        UScriptCode script;
111        const UChar** families;
112    };
113
114    // Kartika on Vista or earlier lacks the support for Chillu
115    // letters added to Unicode 5.1.
116    // Try AnjaliOldLipi (a very widely used Malaylalam font with the full
117    // Unicode 5.x support) before falling back to Kartika.
118    static const UChar* malayalamFonts[] = {L"AnjaliOldLipi", L"Lohit Malayalam", L"Kartika", L"Rachana", 0};
119    // Try Khmer OS before Vista fonts because 'Khmer OS' goes along better
120    // with Latin and looks better/larger for the same size.
121    static const UChar* khmerFonts[] = {L"Khmer OS", L"MoolBoran", L"DaunPenh", L"Code2000", 0};
122    // For the following 6 scripts, two or fonts are listed. The fonts in
123    // the 1st slot are not available on Windows XP. To support these
124    // scripts on XP, listed in the rest of slots are widely used
125    // fonts.
126    static const UChar* ethiopicFonts[] = {L"Nyala", L"Abyssinica SIL", L"Ethiopia Jiret", L"Visual Geez Unicode", L"GF Zemen Unicode", 0};
127    static const UChar* oriyaFonts[] = {L"Kalinga", L"ori1Uni", L"Lohit Oriya", 0};
128    static const UChar* laoFonts[] = {L"DokChampa", L"Saysettha OT", L"Phetsarath OT", L"Code2000", 0};
129    static const UChar* tibetanFonts[] = {L"Microsoft Himalaya", L"Jomolhari", L"Tibetan Machine Uni", 0};
130    static const UChar* sinhalaFonts[] = {L"Iskoola Pota", L"AksharUnicode", 0};
131    static const UChar* yiFonts[] = {L"Microsoft Yi Balti", L"Nuosu SIL", L"Code2000", 0};
132    // http://www.bethmardutho.org/support/meltho/download/index.php
133    static const UChar* syriacFonts[] = {L"Estrangelo Edessa", L"Estrangelo Nisibin", L"Code2000", 0};
134    // No Myanmar/Burmese font is shipped with Windows, yet. Try a few
135    // widely available/used ones that supports Unicode 5.1 or later.
136    static const UChar* myanmarFonts[] = {L"Padauk", L"Parabaik", L"Myanmar3", L"Code2000", 0};
137
138    static const ScriptToFontFamilies scriptToFontFamilies[] = {
139        {USCRIPT_MALAYALAM, malayalamFonts},
140        {USCRIPT_KHMER, khmerFonts},
141        {USCRIPT_ETHIOPIC, ethiopicFonts},
142        {USCRIPT_ORIYA, oriyaFonts},
143        {USCRIPT_LAO, laoFonts},
144        {USCRIPT_TIBETAN, tibetanFonts},
145        {USCRIPT_SINHALA, sinhalaFonts},
146        {USCRIPT_YI, yiFonts},
147        {USCRIPT_SYRIAC, syriacFonts},
148        {USCRIPT_MYANMAR, myanmarFonts},
149    };
150
151    for (size_t i = 0; i < WTF_ARRAY_LENGTH(fontMap); ++i)
152        scriptFontMap[fontMap[i].script] = fontMap[i].family;
153
154    // FIXME: Instead of scanning the hard-coded list, we have to
155    // use EnumFont* to 'inspect' fonts to pick up fonts covering scripts
156    // when it's possible (e.g. using OS/2 table). If we do that, this
157    // had better be pulled out of here.
158    for (size_t i = 0; i < WTF_ARRAY_LENGTH(scriptToFontFamilies); ++i) {
159        UScriptCode script = scriptToFontFamilies[i].script;
160        scriptFontMap[script] = 0;
161        const UChar** familyPtr = scriptToFontFamilies[i].families;
162        while (*familyPtr) {
163            if (isFontPresent(*familyPtr)) {
164                scriptFontMap[script] = *familyPtr;
165                break;
166            }
167            ++familyPtr;
168        }
169    }
170
171    // Initialize the locale-dependent mapping.
172    // Since Chrome synchronizes the ICU default locale with its UI locale,
173    // this ICU locale tells the current UI locale of Chrome.
174    icu::Locale locale = icu::Locale::getDefault();
175    const UChar* localeFamily = 0;
176    if (locale == icu::Locale::getJapanese()) {
177        localeFamily = scriptFontMap[USCRIPT_HIRAGANA];
178    } else if (locale == icu::Locale::getKorean()) {
179        localeFamily = scriptFontMap[USCRIPT_HANGUL];
180    } else if (locale == icu::Locale::getTraditionalChinese()) {
181        localeFamily = scriptFontMap[USCRIPT_TRADITIONAL_HAN];
182    } else {
183        // For other locales, use the simplified Chinese font for Han.
184        localeFamily = scriptFontMap[USCRIPT_SIMPLIFIED_HAN];
185    }
186    if (localeFamily)
187        scriptFontMap[USCRIPT_HAN] = localeFamily;
188}
189
190// There are a lot of characters in USCRIPT_COMMON that can be covered
191// by fonts for scripts closely related to them. See
192// http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:]
193// FIXME: make this more efficient with a wider coverage
194UScriptCode getScriptBasedOnUnicodeBlock(int ucs4)
195{
196    UBlockCode block = ublock_getCode(ucs4);
197    switch (block) {
198    case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
199        return USCRIPT_HAN;
200    case UBLOCK_HIRAGANA:
201    case UBLOCK_KATAKANA:
202        return USCRIPT_HIRAGANA;
203    case UBLOCK_ARABIC:
204        return USCRIPT_ARABIC;
205    case UBLOCK_THAI:
206        return USCRIPT_THAI;
207    case UBLOCK_GREEK:
208        return USCRIPT_GREEK;
209    case UBLOCK_DEVANAGARI:
210        // For Danda and Double Danda (U+0964, U+0965), use a Devanagari
211        // font for now although they're used by other scripts as well.
212        // Without a context, we can't do any better.
213        return USCRIPT_DEVANAGARI;
214    case UBLOCK_ARMENIAN:
215        return USCRIPT_ARMENIAN;
216    case UBLOCK_GEORGIAN:
217        return USCRIPT_GEORGIAN;
218    case UBLOCK_KANNADA:
219        return USCRIPT_KANNADA;
220    default:
221        return USCRIPT_COMMON;
222    }
223}
224
225UScriptCode getScript(int ucs4)
226{
227    UErrorCode err = U_ZERO_ERROR;
228    UScriptCode script = uscript_getScript(ucs4, &err);
229    // If script is invalid, common or inherited or there's an error,
230    // infer a script based on the unicode block of a character.
231    if (script <= USCRIPT_INHERITED || U_FAILURE(err))
232        script = getScriptBasedOnUnicodeBlock(ucs4);
233    return script;
234}
235
236} // namespace
237
238// FIXME: this is font fallback code version 0.1
239//  - Cover all the scripts
240//  - Get the default font for each script/generic family from the
241//    preference instead of hardcoding in the source.
242//    (at least, read values from the registry for IE font settings).
243//  - Support generic families (from FontDescription)
244//  - If the default font for a script is not available,
245//    try some more fonts known to support it. Finally, we can
246//    use EnumFontFamilies or similar APIs to come up with a list of
247//    fonts supporting the script and cache the result.
248//  - Consider using UnicodeSet (or UnicodeMap) converted from
249//    GLYPHSET (BMP) or directly read from truetype cmap tables to
250//    keep track of which character is supported by which font
251//  - Update script_font_cache in response to WM_FONTCHANGE
252
253const UChar* getFontFamilyForScript(UScriptCode script,
254    FontDescription::GenericFamilyType generic)
255{
256    static ScriptToFontMap scriptFontMap;
257    static bool initialized = false;
258    if (!initialized) {
259        initializeScriptFontMap(scriptFontMap);
260        initialized = true;
261    }
262    if (script == USCRIPT_INVALID_CODE)
263        return 0;
264    ASSERT(script < USCRIPT_CODE_LIMIT);
265    return scriptFontMap[script];
266}
267
268// FIXME:
269//  - Handle 'Inherited', 'Common' and 'Unknown'
270//    (see http://www.unicode.org/reports/tr24/#Usage_Model )
271//    For 'Inherited' and 'Common', perhaps we need to
272//    accept another parameter indicating the previous family
273//    and just return it.
274//  - All the characters (or characters up to the point a single
275//    font can cover) need to be taken into account
276const UChar* getFallbackFamily(UChar32 character,
277    FontDescription::GenericFamilyType generic,
278    UScriptCode* scriptChecked)
279{
280    ASSERT(character);
281    UScriptCode script = getScript(character);
282
283    // For the full-width ASCII characters (U+FF00 - U+FF5E), use the font for
284    // Han (determined in a locale-dependent way above). Full-width ASCII
285    // characters are rather widely used in Japanese and Chinese documents and
286    // they're fully covered by Chinese, Japanese and Korean fonts.
287    if (0xFF00 < character && character < 0xFF5F)
288        script = USCRIPT_HAN;
289
290    if (script == USCRIPT_COMMON)
291        script = getScriptBasedOnUnicodeBlock(character);
292
293    const UChar* family = getFontFamilyForScript(script, generic);
294    // Another lame work-around to cover non-BMP characters.
295    // If the font family for script is not found or the character is
296    // not in BMP (> U+FFFF), we resort to the hard-coded list of
297    // fallback fonts for now.
298    if (!family || character > 0xFFFF) {
299        int plane = character >> 16;
300        switch (plane) {
301        case 1:
302            family = L"code2001";
303            break;
304        case 2:
305            // Use a Traditional Chinese ExtB font if in Traditional Chinese locale.
306            // Otherwise, use a Simplified Chinese ExtB font. Windows Japanese
307            // fonts do support a small subset of ExtB (that are included in JIS X 0213),
308            // but its coverage is rather sparse.
309            // Eventually, this should be controlled by lang/xml:lang.
310            if (icu::Locale::getDefault() == icu::Locale::getTraditionalChinese())
311                family = L"pmingliu-extb";
312            else
313                family = L"simsun-extb";
314            break;
315        default:
316            family = L"lucida sans unicode";
317        }
318    }
319
320    if (scriptChecked)
321        *scriptChecked = script;
322    return family;
323}
324
325
326const UChar* getFallbackFamilyForFirstNonCommonCharacter(const UChar* characters,
327    int length,
328    FontDescription::GenericFamilyType generic)
329{
330    ASSERT(characters && characters[0] && length > 0);
331    UScriptCode script = USCRIPT_COMMON;
332
333    // Sometimes characters common to script (e.g. space) is at
334    // the beginning of a string so that we need to skip them
335    // to get a font required to render the string.
336    int i = 0;
337    UChar32 ucs4 = 0;
338    while (i < length && script == USCRIPT_COMMON) {
339        U16_NEXT(characters, i, length, ucs4);
340        script = getScript(ucs4);
341    }
342
343    const UChar* family = getFallbackFamily(ucs4, generic, 0);
344
345    return family;
346}
347
348} // namespace WebCore
349