1/*
2 * Copyright (c) 2006, 2007, 2008, 2009, 2010, Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 *     * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *     * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 *     * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include "config.h"
32#include "FontUtilsChromiumWin.h"
33
34#include <limits>
35
36#include "PlatformString.h"
37#include "UniscribeHelper.h"
38#include <unicode/locid.h>
39#include <unicode/uchar.h>
40#include <wtf/HashMap.h>
41#include <wtf/text/StringHash.h>
42
43namespace WebCore {
44
45namespace {
46
47bool isFontPresent(const UChar* fontName)
48{
49    HFONT hfont = CreateFont(12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50                             fontName);
51    if (!hfont)
52        return false;
53    HDC dc = GetDC(0);
54    HGDIOBJ oldFont = static_cast<HFONT>(SelectObject(dc, hfont));
55    WCHAR actualFontName[LF_FACESIZE];
56    GetTextFace(dc, LF_FACESIZE, actualFontName);
57    actualFontName[LF_FACESIZE - 1] = 0;
58    SelectObject(dc, oldFont);
59    DeleteObject(hfont);
60    ReleaseDC(0, dc);
61    // We don't have to worry about East Asian fonts with locale-dependent
62    // names here for now.
63    return !wcscmp(fontName, actualFontName);
64}
65
66// A simple mapping from UScriptCode to family name.  This is a sparse array,
67// which works well since the range of UScriptCode values is small.
68typedef const UChar* ScriptToFontMap[USCRIPT_CODE_LIMIT];
69
70void initializeScriptFontMap(ScriptToFontMap& scriptFontMap)
71{
72    struct FontMap {
73        UScriptCode script;
74        const UChar* family;
75    };
76
77    static const FontMap fontMap[] = {
78        {USCRIPT_LATIN, L"times new roman"},
79        {USCRIPT_GREEK, L"times new roman"},
80        {USCRIPT_CYRILLIC, L"times new roman"},
81        // FIXME: Consider trying new Vista fonts before XP fonts for CJK.
82        // Some Vista users do want to use Vista cleartype CJK fonts. If we
83        // did, the results of tests with CJK characters would have to be
84        // regenerated for Vista.
85        {USCRIPT_SIMPLIFIED_HAN, L"simsun"},
86        {USCRIPT_TRADITIONAL_HAN, L"pmingliu"},
87        {USCRIPT_HIRAGANA, L"ms pgothic"},
88        {USCRIPT_KATAKANA, L"ms pgothic"},
89        {USCRIPT_KATAKANA_OR_HIRAGANA, L"ms pgothic"},
90        {USCRIPT_HANGUL, L"gulim"},
91        {USCRIPT_THAI, L"tahoma"},
92        {USCRIPT_HEBREW, L"david"},
93        {USCRIPT_ARABIC, L"tahoma"},
94        {USCRIPT_DEVANAGARI, L"mangal"},
95        {USCRIPT_BENGALI, L"vrinda"},
96        {USCRIPT_GURMUKHI, L"raavi"},
97        {USCRIPT_GUJARATI, L"shruti"},
98        {USCRIPT_TAMIL, L"latha"},
99        {USCRIPT_TELUGU, L"gautami"},
100        {USCRIPT_KANNADA, L"tunga"},
101        {USCRIPT_GEORGIAN, L"sylfaen"},
102        {USCRIPT_ARMENIAN, L"sylfaen"},
103        {USCRIPT_THAANA, L"mv boli"},
104        {USCRIPT_CANADIAN_ABORIGINAL, L"euphemia"},
105        {USCRIPT_CHEROKEE, L"plantagenet cherokee"},
106        {USCRIPT_MONGOLIAN, L"mongolian balti"},
107        // For USCRIPT_COMMON, we map blocks to scripts when
108        // that makes sense.
109    };
110
111    struct ScriptToFontFamilies {
112        UScriptCode script;
113        const UChar** families;
114    };
115
116    // Kartika on Vista or earlier lacks the support for Chillu
117    // letters added to Unicode 5.1.
118    // Try AnjaliOldLipi (a very widely used Malaylalam font with the full
119    // Unicode 5.x support) before falling back to Kartika.
120    static const UChar* malayalamFonts[] = {L"AnjaliOldLipi", L"Lohit Malayalam", L"Kartika", L"Rachana", 0};
121    // Try Khmer OS before Vista fonts because 'Khmer OS' goes along better
122    // with Latin and looks better/larger for the same size.
123    static const UChar* khmerFonts[] = {L"Khmer OS", L"MoolBoran", L"DaunPenh", L"Code2000", 0};
124    // For the following 6 scripts, two or fonts are listed. The fonts in
125    // the 1st slot are not available on Windows XP. To support these
126    // scripts on XP, listed in the rest of slots are widely used
127    // fonts.
128    static const UChar* ethiopicFonts[] = {L"Nyala", L"Abyssinica SIL", L"Ethiopia Jiret", L"Visual Geez Unicode", L"GF Zemen Unicode", 0};
129    static const UChar* oriyaFonts[] = {L"Kalinga", L"ori1Uni", L"Lohit Oriya", 0};
130    static const UChar* laoFonts[] = {L"DokChampa", L"Saysettha OT", L"Phetsarath OT", L"Code2000", 0};
131    static const UChar* tibetanFonts[] = {L"Microsoft Himalaya", L"Jomolhari", L"Tibetan Machine Uni", 0};
132    static const UChar* sinhalaFonts[] = {L"Iskoola Pota", L"AksharUnicode", 0};
133    static const UChar* yiFonts[] = {L"Microsoft Yi Balti", L"Nuosu SIL", L"Code2000", 0};
134    // http://www.bethmardutho.org/support/meltho/download/index.php
135    static const UChar* syriacFonts[] = {L"Estrangelo Edessa", L"Estrangelo Nisibin", L"Code2000", 0};
136    // No Myanmar/Burmese font is shipped with Windows, yet. Try a few
137    // widely available/used ones that supports Unicode 5.1 or later.
138    static const UChar* myanmarFonts[] = {L"Padauk", L"Parabaik", L"Myanmar3", L"Code2000", 0};
139
140    static const ScriptToFontFamilies scriptToFontFamilies[] = {
141        {USCRIPT_MALAYALAM, malayalamFonts},
142        {USCRIPT_KHMER, khmerFonts},
143        {USCRIPT_ETHIOPIC, ethiopicFonts},
144        {USCRIPT_ORIYA, oriyaFonts},
145        {USCRIPT_LAO, laoFonts},
146        {USCRIPT_TIBETAN, tibetanFonts},
147        {USCRIPT_SINHALA, sinhalaFonts},
148        {USCRIPT_YI, yiFonts},
149        {USCRIPT_SYRIAC, syriacFonts},
150        {USCRIPT_MYANMAR, myanmarFonts},
151    };
152
153    for (size_t i = 0; i < WTF_ARRAY_LENGTH(fontMap); ++i)
154        scriptFontMap[fontMap[i].script] = fontMap[i].family;
155
156    // FIXME: Instead of scanning the hard-coded list, we have to
157    // use EnumFont* to 'inspect' fonts to pick up fonts covering scripts
158    // when it's possible (e.g. using OS/2 table). If we do that, this
159    // had better be pulled out of here.
160    for (size_t i = 0; i < WTF_ARRAY_LENGTH(scriptToFontFamilies); ++i) {
161        UScriptCode script = scriptToFontFamilies[i].script;
162        scriptFontMap[script] = 0;
163        const UChar** familyPtr = scriptToFontFamilies[i].families;
164        while (*familyPtr) {
165            if (isFontPresent(*familyPtr)) {
166                scriptFontMap[script] = *familyPtr;
167                break;
168            }
169            ++familyPtr;
170        }
171    }
172
173    // Initialize the locale-dependent mapping.
174    // Since Chrome synchronizes the ICU default locale with its UI locale,
175    // this ICU locale tells the current UI locale of Chrome.
176    icu::Locale locale = icu::Locale::getDefault();
177    const UChar* localeFamily = 0;
178    if (locale == icu::Locale::getJapanese())
179        localeFamily = scriptFontMap[USCRIPT_HIRAGANA];
180    else if (locale == icu::Locale::getKorean())
181        localeFamily = scriptFontMap[USCRIPT_HANGUL];
182    else if (locale == icu::Locale::getTraditionalChinese())
183        localeFamily = scriptFontMap[USCRIPT_TRADITIONAL_HAN];
184    else {
185        // For other locales, use the simplified Chinese font for Han.
186        localeFamily = scriptFontMap[USCRIPT_SIMPLIFIED_HAN];
187    }
188    if (localeFamily)
189        scriptFontMap[USCRIPT_HAN] = localeFamily;
190}
191
192// There are a lot of characters in USCRIPT_COMMON that can be covered
193// by fonts for scripts closely related to them. See
194// http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:]
195// FIXME: make this more efficient with a wider coverage
196UScriptCode getScriptBasedOnUnicodeBlock(int ucs4)
197{
198    UBlockCode block = ublock_getCode(ucs4);
199    switch (block) {
200    case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
201        return USCRIPT_HAN;
202    case UBLOCK_HIRAGANA:
203    case UBLOCK_KATAKANA:
204        return USCRIPT_HIRAGANA;
205    case UBLOCK_ARABIC:
206        return USCRIPT_ARABIC;
207    case UBLOCK_THAI:
208        return USCRIPT_THAI;
209    case UBLOCK_GREEK:
210        return USCRIPT_GREEK;
211    case UBLOCK_DEVANAGARI:
212        // For Danda and Double Danda (U+0964, U+0965), use a Devanagari
213        // font for now although they're used by other scripts as well.
214        // Without a context, we can't do any better.
215        return USCRIPT_DEVANAGARI;
216    case UBLOCK_ARMENIAN:
217        return USCRIPT_ARMENIAN;
218    case UBLOCK_GEORGIAN:
219        return USCRIPT_GEORGIAN;
220    case UBLOCK_KANNADA:
221        return USCRIPT_KANNADA;
222    default:
223        return USCRIPT_COMMON;
224    }
225}
226
227UScriptCode getScript(int ucs4)
228{
229    UErrorCode err = U_ZERO_ERROR;
230    UScriptCode script = uscript_getScript(ucs4, &err);
231    // If script is invalid, common or inherited or there's an error,
232    // infer a script based on the unicode block of a character.
233    if (script <= USCRIPT_INHERITED || U_FAILURE(err))
234        script = getScriptBasedOnUnicodeBlock(ucs4);
235    return script;
236}
237
238const int kUndefinedAscent = std::numeric_limits<int>::min();
239
240// Given an HFONT, return the ascent. If GetTextMetrics fails,
241// kUndefinedAscent is returned, instead.
242int getAscent(HFONT hfont)
243{
244    HDC dc = GetDC(0);
245    HGDIOBJ oldFont = SelectObject(dc, hfont);
246    TEXTMETRIC tm;
247    BOOL gotMetrics = GetTextMetrics(dc, &tm);
248    SelectObject(dc, oldFont);
249    ReleaseDC(0, dc);
250    return gotMetrics ? tm.tmAscent : kUndefinedAscent;
251}
252
253WORD getSpaceGlyph(HFONT hfont)
254{
255    HDC dc = GetDC(0);
256    HGDIOBJ oldFont = SelectObject(dc, hfont);
257    WCHAR space = L' ';
258    WORD spaceGlyph = 0;
259    GetGlyphIndices(dc, &space, 1, &spaceGlyph, 0);
260    SelectObject(dc, oldFont);
261    ReleaseDC(0, dc);
262    return spaceGlyph;
263}
264
265struct FontData {
266    FontData()
267        : hfont(0)
268        , ascent(kUndefinedAscent)
269        , scriptCache(0)
270        , spaceGlyph(0)
271    {
272    }
273
274    HFONT hfont;
275    int ascent;
276    mutable SCRIPT_CACHE scriptCache;
277    WORD spaceGlyph;
278};
279
280// Again, using hash_map does not earn us much here.  page_cycler_test intl2
281// gave us a 'better' result with map than with hash_map even though they're
282// well-within 1-sigma of each other so that the difference is not significant.
283// On the other hand, some pages in intl2 seem to take longer to load with map
284// in the 1st pass. Need to experiment further.
285typedef HashMap<String, FontData> FontDataCache;
286
287} // namespace
288
289// FIXME: this is font fallback code version 0.1
290//  - Cover all the scripts
291//  - Get the default font for each script/generic family from the
292//    preference instead of hardcoding in the source.
293//    (at least, read values from the registry for IE font settings).
294//  - Support generic families (from FontDescription)
295//  - If the default font for a script is not available,
296//    try some more fonts known to support it. Finally, we can
297//    use EnumFontFamilies or similar APIs to come up with a list of
298//    fonts supporting the script and cache the result.
299//  - Consider using UnicodeSet (or UnicodeMap) converted from
300//    GLYPHSET (BMP) or directly read from truetype cmap tables to
301//    keep track of which character is supported by which font
302//  - Update script_font_cache in response to WM_FONTCHANGE
303
304const UChar* getFontFamilyForScript(UScriptCode script,
305                                    FontDescription::GenericFamilyType generic)
306{
307    static ScriptToFontMap scriptFontMap;
308    static bool initialized = false;
309    if (!initialized) {
310        initializeScriptFontMap(scriptFontMap);
311        initialized = true;
312    }
313    if (script == USCRIPT_INVALID_CODE)
314        return 0;
315    ASSERT(script < USCRIPT_CODE_LIMIT);
316    return scriptFontMap[script];
317}
318
319// FIXME:
320//  - Handle 'Inherited', 'Common' and 'Unknown'
321//    (see http://www.unicode.org/reports/tr24/#Usage_Model )
322//    For 'Inherited' and 'Common', perhaps we need to
323//    accept another parameter indicating the previous family
324//    and just return it.
325//  - All the characters (or characters up to the point a single
326//    font can cover) need to be taken into account
327const UChar* getFallbackFamily(const UChar* characters,
328                               int length,
329                               FontDescription::GenericFamilyType generic,
330                               UChar32* charChecked,
331                               UScriptCode* scriptChecked)
332{
333    ASSERT(characters && characters[0] && length > 0);
334    UScriptCode script = USCRIPT_COMMON;
335
336    // Sometimes characters common to script (e.g. space) is at
337    // the beginning of a string so that we need to skip them
338    // to get a font required to render the string.
339    int i = 0;
340    UChar32 ucs4 = 0;
341    while (i < length && script == USCRIPT_COMMON) {
342        U16_NEXT(characters, i, length, ucs4);
343        script = getScript(ucs4);
344    }
345
346    // For the full-width ASCII characters (U+FF00 - U+FF5E), use the font for
347    // Han (determined in a locale-dependent way above). Full-width ASCII
348    // characters are rather widely used in Japanese and Chinese documents and
349    // they're fully covered by Chinese, Japanese and Korean fonts.
350    if (0xFF00 < ucs4 && ucs4 < 0xFF5F)
351        script = USCRIPT_HAN;
352
353    if (script == USCRIPT_COMMON)
354        script = getScriptBasedOnUnicodeBlock(ucs4);
355
356    const UChar* family = getFontFamilyForScript(script, generic);
357    // Another lame work-around to cover non-BMP characters.
358    // If the font family for script is not found or the character is
359    // not in BMP (> U+FFFF), we resort to the hard-coded list of
360    // fallback fonts for now.
361    if (!family || ucs4 > 0xFFFF) {
362        int plane = ucs4 >> 16;
363        switch (plane) {
364        case 1:
365            family = L"code2001";
366            break;
367        case 2:
368            // Use a Traditional Chinese ExtB font if in Traditional Chinese locale.
369            // Otherwise, use a Simplified Chinese ExtB font. Windows Japanese
370            // fonts do support a small subset of ExtB (that are included in JIS X 0213),
371            // but its coverage is rather sparse.
372            // Eventually, this should be controlled by lang/xml:lang.
373            if (icu::Locale::getDefault() == icu::Locale::getTraditionalChinese())
374              family = L"pmingliu-extb";
375            else
376              family = L"simsun-extb";
377            break;
378        default:
379            family = L"lucida sans unicode";
380        }
381    }
382
383    if (charChecked)
384        *charChecked = ucs4;
385    if (scriptChecked)
386        *scriptChecked = script;
387    return family;
388}
389
390// Be aware that this is not thread-safe.
391bool getDerivedFontData(const UChar* family,
392                        int style,
393                        LOGFONT* logfont,
394                        int* ascent,
395                        HFONT* hfont,
396                        SCRIPT_CACHE** scriptCache,
397                        WORD* spaceGlyph)
398{
399    ASSERT(logfont);
400    ASSERT(family);
401    ASSERT(*family);
402
403    // It does not matter that we leak font data when we exit.
404    static FontDataCache fontDataCache;
405
406    // FIXME: This comes up pretty high in the profile so that
407    // we need to measure whether using SHA256 (after coercing all the
408    // fields to char*) is faster than String::format.
409    String fontKey = String::format("%1d:%d:%ls", style, logfont->lfHeight, family);
410    FontDataCache::iterator iter = fontDataCache.find(fontKey);
411    FontData* derived;
412    if (iter == fontDataCache.end()) {
413        ASSERT(wcslen(family) < LF_FACESIZE);
414        wcscpy_s(logfont->lfFaceName, LF_FACESIZE, family);
415        // FIXME: CreateFontIndirect always comes up with
416        // a font even if there's no font matching the name. Need to
417        // check it against what we actually want (as is done in
418        // FontCacheWin.cpp)
419        pair<FontDataCache::iterator, bool> entry = fontDataCache.add(fontKey, FontData());
420        derived = &entry.first->second;
421        derived->hfont = CreateFontIndirect(logfont);
422        // GetAscent may return kUndefinedAscent, but we still want to
423        // cache it so that we won't have to call CreateFontIndirect once
424        // more for HFONT next time.
425        derived->ascent = getAscent(derived->hfont);
426        derived->spaceGlyph = getSpaceGlyph(derived->hfont);
427    } else {
428        derived = &iter->second;
429        // Last time, GetAscent failed so that only HFONT was
430        // cached. Try once more assuming that TryPreloadFont
431        // was called by a caller between calls.
432        if (kUndefinedAscent == derived->ascent)
433            derived->ascent = getAscent(derived->hfont);
434    }
435    *hfont = derived->hfont;
436    *ascent = derived->ascent;
437    *scriptCache = &(derived->scriptCache);
438    *spaceGlyph = derived->spaceGlyph;
439    return *ascent != kUndefinedAscent;
440}
441
442int getStyleFromLogfont(const LOGFONT* logfont)
443{
444    // FIXME: consider defining UNDEFINED or INVALID for style and
445    //                  returning it when logfont is 0
446    if (!logfont) {
447        ASSERT_NOT_REACHED();
448        return FontStyleNormal;
449    }
450    return (logfont->lfItalic ? FontStyleItalic : FontStyleNormal) |
451           (logfont->lfUnderline ? FontStyleUnderlined : FontStyleNormal) |
452           (logfont->lfWeight >= 700 ? FontStyleBold : FontStyleNormal);
453}
454
455} // namespace WebCore
456