1/* 2 * Copyright (c) 2006, 2007, 2008, 2009, 2010, 2012 Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31#include "config.h" 32#include "platform/fonts/win/FontFallbackWin.h" 33 34#include "platform/win/HWndDC.h" 35#include "wtf/HashMap.h" 36#include "wtf/text/StringHash.h" 37#include "wtf/text/WTFString.h" 38#include <limits> 39#include <unicode/locid.h> 40#include <unicode/uchar.h> 41 42namespace WebCore { 43 44namespace { 45 46bool isFontPresent(const UChar* fontName) 47{ 48 HFONT hfont = CreateFont(12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, fontName); 49 if (!hfont) 50 return false; 51 HWndDC dc(0); 52 HGDIOBJ oldFont = static_cast<HFONT>(SelectObject(dc, hfont)); 53 WCHAR actualFontName[LF_FACESIZE]; 54 GetTextFace(dc, LF_FACESIZE, actualFontName); 55 actualFontName[LF_FACESIZE - 1] = 0; 56 SelectObject(dc, oldFont); 57 DeleteObject(hfont); 58 // We don't have to worry about East Asian fonts with locale-dependent 59 // names here for now. 60 // FIXME: Why not? 61 return !wcscmp(fontName, actualFontName); 62} 63 64// A simple mapping from UScriptCode to family name. This is a sparse array, 65// which works well since the range of UScriptCode values is small. 66typedef const UChar* ScriptToFontMap[USCRIPT_CODE_LIMIT]; 67 68void initializeScriptFontMap(ScriptToFontMap& scriptFontMap) 69{ 70 struct FontMap { 71 UScriptCode script; 72 const UChar* family; 73 }; 74 75 static const FontMap fontMap[] = { 76 {USCRIPT_LATIN, L"times new roman"}, 77 {USCRIPT_GREEK, L"times new roman"}, 78 {USCRIPT_CYRILLIC, L"times new roman"}, 79 // FIXME: Consider trying new Vista fonts before XP fonts for CJK. 80 // Some Vista users do want to use Vista cleartype CJK fonts. If we 81 // did, the results of tests with CJK characters would have to be 82 // regenerated for Vista. 83 {USCRIPT_SIMPLIFIED_HAN, L"simsun"}, 84 {USCRIPT_TRADITIONAL_HAN, L"pmingliu"}, 85 {USCRIPT_HIRAGANA, L"ms pgothic"}, 86 {USCRIPT_KATAKANA, L"ms pgothic"}, 87 {USCRIPT_KATAKANA_OR_HIRAGANA, L"ms pgothic"}, 88 {USCRIPT_HANGUL, L"gulim"}, 89 {USCRIPT_THAI, L"tahoma"}, 90 {USCRIPT_HEBREW, L"david"}, 91 {USCRIPT_ARABIC, L"tahoma"}, 92 {USCRIPT_DEVANAGARI, L"mangal"}, 93 {USCRIPT_BENGALI, L"vrinda"}, 94 {USCRIPT_GURMUKHI, L"raavi"}, 95 {USCRIPT_GUJARATI, L"shruti"}, 96 {USCRIPT_TAMIL, L"latha"}, 97 {USCRIPT_TELUGU, L"gautami"}, 98 {USCRIPT_KANNADA, L"tunga"}, 99 {USCRIPT_GEORGIAN, L"sylfaen"}, 100 {USCRIPT_ARMENIAN, L"sylfaen"}, 101 {USCRIPT_THAANA, L"mv boli"}, 102 {USCRIPT_CANADIAN_ABORIGINAL, L"euphemia"}, 103 {USCRIPT_CHEROKEE, L"plantagenet cherokee"}, 104 {USCRIPT_MONGOLIAN, L"mongolian balti"}, 105 // For USCRIPT_COMMON, we map blocks to scripts when 106 // that makes sense. 107 }; 108 109 struct ScriptToFontFamilies { 110 UScriptCode script; 111 const UChar** families; 112 }; 113 114 // Kartika on Vista or earlier lacks the support for Chillu 115 // letters added to Unicode 5.1. 116 // Try AnjaliOldLipi (a very widely used Malaylalam font with the full 117 // Unicode 5.x support) before falling back to Kartika. 118 static const UChar* malayalamFonts[] = {L"AnjaliOldLipi", L"Lohit Malayalam", L"Kartika", L"Rachana", 0}; 119 // Try Khmer OS before Vista fonts because 'Khmer OS' goes along better 120 // with Latin and looks better/larger for the same size. 121 static const UChar* khmerFonts[] = {L"Khmer OS", L"MoolBoran", L"DaunPenh", L"Code2000", 0}; 122 // For the following 6 scripts, two or fonts are listed. The fonts in 123 // the 1st slot are not available on Windows XP. To support these 124 // scripts on XP, listed in the rest of slots are widely used 125 // fonts. 126 static const UChar* ethiopicFonts[] = {L"Nyala", L"Abyssinica SIL", L"Ethiopia Jiret", L"Visual Geez Unicode", L"GF Zemen Unicode", 0}; 127 static const UChar* oriyaFonts[] = {L"Kalinga", L"ori1Uni", L"Lohit Oriya", 0}; 128 static const UChar* laoFonts[] = {L"DokChampa", L"Saysettha OT", L"Phetsarath OT", L"Code2000", 0}; 129 static const UChar* tibetanFonts[] = {L"Microsoft Himalaya", L"Jomolhari", L"Tibetan Machine Uni", 0}; 130 static const UChar* sinhalaFonts[] = {L"Iskoola Pota", L"AksharUnicode", 0}; 131 static const UChar* yiFonts[] = {L"Microsoft Yi Balti", L"Nuosu SIL", L"Code2000", 0}; 132 // http://www.bethmardutho.org/support/meltho/download/index.php 133 static const UChar* syriacFonts[] = {L"Estrangelo Edessa", L"Estrangelo Nisibin", L"Code2000", 0}; 134 // No Myanmar/Burmese font is shipped with Windows, yet. Try a few 135 // widely available/used ones that supports Unicode 5.1 or later. 136 static const UChar* myanmarFonts[] = {L"Padauk", L"Parabaik", L"Myanmar3", L"Code2000", 0}; 137 138 static const ScriptToFontFamilies scriptToFontFamilies[] = { 139 {USCRIPT_MALAYALAM, malayalamFonts}, 140 {USCRIPT_KHMER, khmerFonts}, 141 {USCRIPT_ETHIOPIC, ethiopicFonts}, 142 {USCRIPT_ORIYA, oriyaFonts}, 143 {USCRIPT_LAO, laoFonts}, 144 {USCRIPT_TIBETAN, tibetanFonts}, 145 {USCRIPT_SINHALA, sinhalaFonts}, 146 {USCRIPT_YI, yiFonts}, 147 {USCRIPT_SYRIAC, syriacFonts}, 148 {USCRIPT_MYANMAR, myanmarFonts}, 149 }; 150 151 for (size_t i = 0; i < WTF_ARRAY_LENGTH(fontMap); ++i) 152 scriptFontMap[fontMap[i].script] = fontMap[i].family; 153 154 // FIXME: Instead of scanning the hard-coded list, we have to 155 // use EnumFont* to 'inspect' fonts to pick up fonts covering scripts 156 // when it's possible (e.g. using OS/2 table). If we do that, this 157 // had better be pulled out of here. 158 for (size_t i = 0; i < WTF_ARRAY_LENGTH(scriptToFontFamilies); ++i) { 159 UScriptCode script = scriptToFontFamilies[i].script; 160 scriptFontMap[script] = 0; 161 const UChar** familyPtr = scriptToFontFamilies[i].families; 162 while (*familyPtr) { 163 if (isFontPresent(*familyPtr)) { 164 scriptFontMap[script] = *familyPtr; 165 break; 166 } 167 ++familyPtr; 168 } 169 } 170 171 // Initialize the locale-dependent mapping. 172 // Since Chrome synchronizes the ICU default locale with its UI locale, 173 // this ICU locale tells the current UI locale of Chrome. 174 icu::Locale locale = icu::Locale::getDefault(); 175 const UChar* localeFamily = 0; 176 if (locale == icu::Locale::getJapanese()) { 177 localeFamily = scriptFontMap[USCRIPT_HIRAGANA]; 178 } else if (locale == icu::Locale::getKorean()) { 179 localeFamily = scriptFontMap[USCRIPT_HANGUL]; 180 } else if (locale == icu::Locale::getTraditionalChinese()) { 181 localeFamily = scriptFontMap[USCRIPT_TRADITIONAL_HAN]; 182 } else { 183 // For other locales, use the simplified Chinese font for Han. 184 localeFamily = scriptFontMap[USCRIPT_SIMPLIFIED_HAN]; 185 } 186 if (localeFamily) 187 scriptFontMap[USCRIPT_HAN] = localeFamily; 188} 189 190// There are a lot of characters in USCRIPT_COMMON that can be covered 191// by fonts for scripts closely related to them. See 192// http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:] 193// FIXME: make this more efficient with a wider coverage 194UScriptCode getScriptBasedOnUnicodeBlock(int ucs4) 195{ 196 UBlockCode block = ublock_getCode(ucs4); 197 switch (block) { 198 case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION: 199 return USCRIPT_HAN; 200 case UBLOCK_HIRAGANA: 201 case UBLOCK_KATAKANA: 202 return USCRIPT_HIRAGANA; 203 case UBLOCK_ARABIC: 204 return USCRIPT_ARABIC; 205 case UBLOCK_THAI: 206 return USCRIPT_THAI; 207 case UBLOCK_GREEK: 208 return USCRIPT_GREEK; 209 case UBLOCK_DEVANAGARI: 210 // For Danda and Double Danda (U+0964, U+0965), use a Devanagari 211 // font for now although they're used by other scripts as well. 212 // Without a context, we can't do any better. 213 return USCRIPT_DEVANAGARI; 214 case UBLOCK_ARMENIAN: 215 return USCRIPT_ARMENIAN; 216 case UBLOCK_GEORGIAN: 217 return USCRIPT_GEORGIAN; 218 case UBLOCK_KANNADA: 219 return USCRIPT_KANNADA; 220 default: 221 return USCRIPT_COMMON; 222 } 223} 224 225UScriptCode getScript(int ucs4) 226{ 227 UErrorCode err = U_ZERO_ERROR; 228 UScriptCode script = uscript_getScript(ucs4, &err); 229 // If script is invalid, common or inherited or there's an error, 230 // infer a script based on the unicode block of a character. 231 if (script <= USCRIPT_INHERITED || U_FAILURE(err)) 232 script = getScriptBasedOnUnicodeBlock(ucs4); 233 return script; 234} 235 236} // namespace 237 238// FIXME: this is font fallback code version 0.1 239// - Cover all the scripts 240// - Get the default font for each script/generic family from the 241// preference instead of hardcoding in the source. 242// (at least, read values from the registry for IE font settings). 243// - Support generic families (from FontDescription) 244// - If the default font for a script is not available, 245// try some more fonts known to support it. Finally, we can 246// use EnumFontFamilies or similar APIs to come up with a list of 247// fonts supporting the script and cache the result. 248// - Consider using UnicodeSet (or UnicodeMap) converted from 249// GLYPHSET (BMP) or directly read from truetype cmap tables to 250// keep track of which character is supported by which font 251// - Update script_font_cache in response to WM_FONTCHANGE 252 253const UChar* getFontFamilyForScript(UScriptCode script, 254 FontDescription::GenericFamilyType generic) 255{ 256 static ScriptToFontMap scriptFontMap; 257 static bool initialized = false; 258 if (!initialized) { 259 initializeScriptFontMap(scriptFontMap); 260 initialized = true; 261 } 262 if (script == USCRIPT_INVALID_CODE) 263 return 0; 264 ASSERT(script < USCRIPT_CODE_LIMIT); 265 return scriptFontMap[script]; 266} 267 268// FIXME: 269// - Handle 'Inherited', 'Common' and 'Unknown' 270// (see http://www.unicode.org/reports/tr24/#Usage_Model ) 271// For 'Inherited' and 'Common', perhaps we need to 272// accept another parameter indicating the previous family 273// and just return it. 274// - All the characters (or characters up to the point a single 275// font can cover) need to be taken into account 276const UChar* getFallbackFamily(UChar32 character, 277 FontDescription::GenericFamilyType generic, 278 UScriptCode* scriptChecked) 279{ 280 ASSERT(character); 281 UScriptCode script = getScript(character); 282 283 // For the full-width ASCII characters (U+FF00 - U+FF5E), use the font for 284 // Han (determined in a locale-dependent way above). Full-width ASCII 285 // characters are rather widely used in Japanese and Chinese documents and 286 // they're fully covered by Chinese, Japanese and Korean fonts. 287 if (0xFF00 < character && character < 0xFF5F) 288 script = USCRIPT_HAN; 289 290 if (script == USCRIPT_COMMON) 291 script = getScriptBasedOnUnicodeBlock(character); 292 293 const UChar* family = getFontFamilyForScript(script, generic); 294 // Another lame work-around to cover non-BMP characters. 295 // If the font family for script is not found or the character is 296 // not in BMP (> U+FFFF), we resort to the hard-coded list of 297 // fallback fonts for now. 298 if (!family || character > 0xFFFF) { 299 int plane = character >> 16; 300 switch (plane) { 301 case 1: 302 family = L"code2001"; 303 break; 304 case 2: 305 // Use a Traditional Chinese ExtB font if in Traditional Chinese locale. 306 // Otherwise, use a Simplified Chinese ExtB font. Windows Japanese 307 // fonts do support a small subset of ExtB (that are included in JIS X 0213), 308 // but its coverage is rather sparse. 309 // Eventually, this should be controlled by lang/xml:lang. 310 if (icu::Locale::getDefault() == icu::Locale::getTraditionalChinese()) 311 family = L"pmingliu-extb"; 312 else 313 family = L"simsun-extb"; 314 break; 315 default: 316 family = L"lucida sans unicode"; 317 } 318 } 319 320 if (scriptChecked) 321 *scriptChecked = script; 322 return family; 323} 324 325 326const UChar* getFallbackFamilyForFirstNonCommonCharacter(const UChar* characters, 327 int length, 328 FontDescription::GenericFamilyType generic) 329{ 330 ASSERT(characters && characters[0] && length > 0); 331 UScriptCode script = USCRIPT_COMMON; 332 333 // Sometimes characters common to script (e.g. space) is at 334 // the beginning of a string so that we need to skip them 335 // to get a font required to render the string. 336 int i = 0; 337 UChar32 ucs4 = 0; 338 while (i < length && script == USCRIPT_COMMON) { 339 U16_NEXT(characters, i, length, ucs4); 340 script = getScript(ucs4); 341 } 342 343 const UChar* family = getFallbackFamily(ucs4, generic, 0); 344 345 return family; 346} 347 348} // namespace WebCore 349