1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17// #define VERBOSE_DEBUG 18 19#define LOG_TAG "Minikin" 20#include <cutils/log.h> 21#include <algorithm> 22 23#include "unicode/unistr.h" 24#include "unicode/unorm2.h" 25 26#include "FontLanguage.h" 27#include "FontLanguageListCache.h" 28#include "MinikinInternal.h" 29#include <minikin/FontCollection.h> 30 31using std::vector; 32 33namespace android { 34 35template <typename T> 36static inline T max(T a, T b) { 37 return a>b ? a : b; 38} 39 40const uint32_t EMOJI_STYLE_VS = 0xFE0F; 41const uint32_t TEXT_STYLE_VS = 0xFE0E; 42 43// See http://www.unicode.org/Public/9.0.0/ucd/StandardizedVariants-9.0.0d1.txt 44// Must be sorted. 45const uint32_t EMOJI_STYLE_VS_BASES[] = { 46 0x0023, 0x002A, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 47 0x00A9, 0x00AE, 0x203C, 0x2049, 0x2122, 0x2139, 0x2194, 0x2195, 0x2196, 0x2197, 0x2198, 0x2199, 48 0x21A9, 0x21AA, 0x231A, 0x231B, 0x2328, 0x23CF, 0x23ED, 0x23EE, 0x23EF, 0x23F1, 0x23F2, 0x23F8, 49 0x23F9, 0x23FA, 0x24C2, 0x25AA, 0x25AB, 0x25B6, 0x25C0, 0x25FB, 0x25FC, 0x25FD, 0x25FE, 0x2600, 50 0x2601, 0x2602, 0x2603, 0x2604, 0x260E, 0x2611, 0x2614, 0x2615, 0x2618, 0x261D, 0x2620, 0x2622, 51 0x2623, 0x2626, 0x262A, 0x262E, 0x262F, 0x2638, 0x2639, 0x263A, 0x2648, 0x2649, 0x264A, 0x264B, 52 0x264C, 0x264D, 0x264E, 0x264F, 0x2650, 0x2651, 0x2652, 0x2653, 0x2660, 0x2663, 0x2665, 0x2666, 53 0x2668, 0x267B, 0x267F, 0x2692, 0x2693, 0x2694, 0x2696, 0x2697, 0x2699, 0x269B, 0x269C, 0x26A0, 54 0x26A1, 0x26AA, 0x26AB, 0x26B0, 0x26B1, 0x26BD, 0x26BE, 0x26C4, 0x26C5, 0x26C8, 0x26CF, 0x26D1, 55 0x26D3, 0x26D4, 0x26E9, 0x26EA, 0x26F0, 0x26F1, 0x26F2, 0x26F3, 0x26F4, 0x26F5, 0x26F7, 0x26F8, 56 0x26F9, 0x26FA, 0x26FD, 0x2702, 0x2708, 0x2709, 0x270C, 0x270D, 0x270F, 0x2712, 0x2714, 0x2716, 57 0x271D, 0x2721, 0x2733, 0x2734, 0x2744, 0x2747, 0x2757, 0x2763, 0x2764, 0x27A1, 0x2934, 0x2935, 58 0x2B05, 0x2B06, 0x2B07, 0x2B1B, 0x2B1C, 0x2B50, 0x2B55, 0x3030, 0x303D, 0x3297, 0x3299, 59 0x1F004, 0x1F170, 0x1F171, 0x1F17E, 0x1F17F, 0x1F202, 0x1F21A, 0x1F22F, 0x1F237, 0x1F321, 60 0x1F324, 0x1F325, 0x1F326, 0x1F327, 0x1F328, 0x1F329, 0x1F32A, 0x1F32B, 0x1F32C, 0x1F336, 61 0x1F37D, 0x1F396, 0x1F397, 0x1F399, 0x1F39A, 0x1F39B, 0x1F39E, 0x1F39F, 0x1F3CB, 0x1F3CC, 62 0x1F3CD, 0x1F3CE, 0x1F3D4, 0x1F3D5, 0x1F3D6, 0x1F3D7, 0x1F3D8, 0x1F3D9, 0x1F3DA, 0x1F3DB, 63 0x1F3DC, 0x1F3DD, 0x1F3DE, 0x1F3DF, 0x1F3F3, 0x1F3F5, 0x1F3F7, 0x1F43F, 0x1F441, 0x1F4FD, 64 0x1F549, 0x1F54A, 0x1F56F, 0x1F570, 0x1F573, 0x1F574, 0x1F575, 0x1F576, 0x1F577, 0x1F578, 65 0x1F579, 0x1F587, 0x1F58A, 0x1F58B, 0x1F58C, 0x1F58D, 0x1F590, 0x1F5A5, 0x1F5A8, 0x1F5B1, 66 0x1F5B2, 0x1F5BC, 0x1F5C2, 0x1F5C3, 0x1F5C4, 0x1F5D1, 0x1F5D2, 0x1F5D3, 0x1F5DC, 0x1F5DD, 67 0x1F5DE, 0x1F5E1, 0x1F5E3, 0x1F5E8, 0x1F5EF, 0x1F5F3, 0x1F5FA, 0x1F6CB, 0x1F6CD, 0x1F6CE, 68 0x1F6CF, 0x1F6E0, 0x1F6E1, 0x1F6E2, 0x1F6E3, 0x1F6E4, 0x1F6E5, 0x1F6E9, 0x1F6F0, 0x1F6F3, 69}; 70 71static bool isEmojiStyleVSBase(uint32_t cp) { 72 const size_t length = sizeof(EMOJI_STYLE_VS_BASES) / sizeof(EMOJI_STYLE_VS_BASES[0]); 73 return std::binary_search(EMOJI_STYLE_VS_BASES, EMOJI_STYLE_VS_BASES + length, cp); 74} 75 76uint32_t FontCollection::sNextId = 0; 77 78FontCollection::FontCollection(const vector<FontFamily*>& typefaces) : 79 mMaxChar(0) { 80 AutoMutex _l(gMinikinLock); 81 mId = sNextId++; 82 vector<uint32_t> lastChar; 83 size_t nTypefaces = typefaces.size(); 84#ifdef VERBOSE_DEBUG 85 ALOGD("nTypefaces = %zd\n", nTypefaces); 86#endif 87 const FontStyle defaultStyle; 88 for (size_t i = 0; i < nTypefaces; i++) { 89 FontFamily* family = typefaces[i]; 90 MinikinFont* typeface = family->getClosestMatch(defaultStyle).font; 91 if (typeface == NULL) { 92 continue; 93 } 94 family->RefLocked(); 95 const SparseBitSet* coverage = family->getCoverage(); 96 if (coverage == nullptr) { 97 family->UnrefLocked(); 98 continue; 99 } 100 mFamilies.push_back(family); // emplace_back would be better 101 if (family->hasVSTable()) { 102 mVSFamilyVec.push_back(family); 103 } 104 mMaxChar = max(mMaxChar, coverage->length()); 105 lastChar.push_back(coverage->nextSetBit(0)); 106 } 107 nTypefaces = mFamilies.size(); 108 LOG_ALWAYS_FATAL_IF(nTypefaces == 0, 109 "Font collection must have at least one valid typeface"); 110 size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage; 111 size_t offset = 0; 112 // TODO: Use variation selector map for mRanges construction. 113 // A font can have a glyph for a base code point and variation selector pair but no glyph for 114 // the base code point without variation selector. The family won't be listed in the range in 115 // this case. 116 for (size_t i = 0; i < nPages; i++) { 117 Range dummy; 118 mRanges.push_back(dummy); 119 Range* range = &mRanges.back(); 120#ifdef VERBOSE_DEBUG 121 ALOGD("i=%zd: range start = %zd\n", i, offset); 122#endif 123 range->start = offset; 124 for (size_t j = 0; j < nTypefaces; j++) { 125 if (lastChar[j] < (i + 1) << kLogCharsPerPage) { 126 FontFamily* family = mFamilies[j]; 127 mFamilyVec.push_back(family); 128 offset++; 129 uint32_t nextChar = family->getCoverage()->nextSetBit((i + 1) << kLogCharsPerPage); 130#ifdef VERBOSE_DEBUG 131 ALOGD("nextChar = %d (j = %zd)\n", nextChar, j); 132#endif 133 lastChar[j] = nextChar; 134 } 135 } 136 range->end = offset; 137 } 138} 139 140FontCollection::~FontCollection() { 141 for (size_t i = 0; i < mFamilies.size(); i++) { 142 mFamilies[i]->UnrefLocked(); 143 } 144} 145 146// Special scores for the font fallback. 147const uint32_t kUnsupportedFontScore = 0; 148const uint32_t kFirstFontScore = UINT32_MAX; 149 150// Calculates a font score. 151// The score of the font family is based on three subscores. 152// - Coverage Score: How well the font family covers the given character or variation sequence. 153// - Language Score: How well the font family is appropriate for the language. 154// - Variant Score: Whether the font family matches the variant. Note that this variant is not the 155// one in BCP47. This is our own font variant (e.g., elegant, compact). 156// 157// Then, there is a priority for these three subscores as follow: 158// Coverage Score > Language Score > Variant Score 159// The returned score reflects this priority order. 160// 161// Note that there are two special scores. 162// - kUnsupportedFontScore: When the font family doesn't support the variation sequence or even its 163// base character. 164// - kFirstFontScore: When the font is the first font family in the collection and it supports the 165// given character or variation sequence. 166uint32_t FontCollection::calcFamilyScore(uint32_t ch, uint32_t vs, int variant, uint32_t langListId, 167 FontFamily* fontFamily) const { 168 169 const uint32_t coverageScore = calcCoverageScore(ch, vs, fontFamily); 170 if (coverageScore == kFirstFontScore || coverageScore == kUnsupportedFontScore) { 171 // No need to calculate other scores. 172 return coverageScore; 173 } 174 175 const uint32_t languageScore = calcLanguageMatchingScore(langListId, *fontFamily); 176 const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily); 177 178 // Subscores are encoded into 31 bits representation to meet the subscore priority. 179 // The highest 2 bits are for coverage score, then following 28 bits are for language score, 180 // then the last 1 bit is for variant score. 181 return coverageScore << 29 | languageScore << 1 | variantScore; 182} 183 184// Calculates a font score based on variation sequence coverage. 185// - Returns kUnsupportedFontScore if the font doesn't support the variation sequence or its base 186// character. 187// - Returns kFirstFontScore if the font family is the first font family in the collection and it 188// supports the given character or variation sequence. 189// - Returns 3 if the font family supports the variation sequence. 190// - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font is an emoji font. 191// - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font is not an emoji font. 192// - Returns 1 if the variation selector is not specified or if the font family only supports the 193// variation sequence's base character. 194uint32_t FontCollection::calcCoverageScore(uint32_t ch, uint32_t vs, FontFamily* fontFamily) const { 195 const bool hasVSGlyph = (vs != 0) && fontFamily->hasGlyph(ch, vs); 196 if (!hasVSGlyph && !fontFamily->getCoverage()->get(ch)) { 197 // The font doesn't support either variation sequence or even the base character. 198 return kUnsupportedFontScore; 199 } 200 201 if ((vs == 0 || hasVSGlyph) && mFamilies[0] == fontFamily) { 202 // If the first font family supports the given character or variation sequence, always use 203 // it. 204 return kFirstFontScore; 205 } 206 207 if (vs == 0) { 208 return 1; 209 } 210 211 if (hasVSGlyph) { 212 return 3; 213 } 214 215 if (vs == EMOJI_STYLE_VS || vs == TEXT_STYLE_VS) { 216 const FontLanguages& langs = FontLanguageListCache::getById(fontFamily->langId()); 217 bool hasEmojiFlag = false; 218 for (size_t i = 0; i < langs.size(); ++i) { 219 if (langs[i].hasEmojiFlag()) { 220 hasEmojiFlag = true; 221 break; 222 } 223 } 224 225 if (vs == EMOJI_STYLE_VS) { 226 return hasEmojiFlag ? 2 : 1; 227 } else { // vs == TEXT_STYLE_VS 228 return hasEmojiFlag ? 1 : 2; 229 } 230 } 231 return 1; 232} 233 234// Calculates font scores based on the script matching and primary langauge matching. 235// 236// If the font's script doesn't support the requested script, the font gets a score of 0. If the 237// font's script supports the requested script and the font has the same primary language as the 238// requested one, the font gets a score of 2. If the font's script supports the requested script 239// but the primary language is different from the requested one, the font gets a score of 1. 240// 241// If two languages in the requested list have the same language score, the font matching with 242// higher priority language gets a higher score. For example, in the case the user requested 243// language list is "ja-Jpan,en-Latn". The score of for the font of "ja-Jpan" gets a higher score 244// than the font of "en-Latn". 245// 246// To achieve the above two conditions, the language score is determined as follows: 247// LanguageScore = s(0) * 3^(m - 1) + s(1) * 3^(m - 2) + ... + s(m - 2) * 3 + s(m - 1) 248// Here, m is the maximum number of languages to be compared, and s(i) is the i-th language's 249// matching score. The possible values of s(i) are 0, 1 and 2. 250uint32_t FontCollection::calcLanguageMatchingScore( 251 uint32_t userLangListId, const FontFamily& fontFamily) { 252 const FontLanguages& langList = FontLanguageListCache::getById(userLangListId); 253 const FontLanguages& fontLanguages = FontLanguageListCache::getById(fontFamily.langId()); 254 255 const size_t maxCompareNum = std::min(langList.size(), FONT_LANGUAGES_LIMIT); 256 uint32_t score = 0; 257 for (size_t i = 0; i < maxCompareNum; ++i) { 258 score = score * 3u + langList[i].calcScoreFor(fontLanguages); 259 } 260 return score; 261} 262 263// Calculates a font score based on variant ("compact" or "elegant") matching. 264// - Returns 1 if the font doesn't have variant or the variant matches with the text style. 265// - No score if the font has a variant but it doesn't match with the text style. 266uint32_t FontCollection::calcVariantMatchingScore(int variant, const FontFamily& fontFamily) { 267 return (fontFamily.variant() == 0 || fontFamily.variant() == variant) ? 1 : 0; 268} 269 270// Implement heuristic for choosing best-match font. Here are the rules: 271// 1. If first font in the collection has the character, it wins. 272// 2. Calculate a score for the font family. See comments in calcFamilyScore for the detail. 273// 3. Highest score wins, with ties resolved to the first font. 274// This method never returns nullptr. 275FontFamily* FontCollection::getFamilyForChar(uint32_t ch, uint32_t vs, 276 uint32_t langListId, int variant) const { 277 if (ch >= mMaxChar) { 278 return mFamilies[0]; 279 } 280 281 const std::vector<FontFamily*>* familyVec = &mFamilyVec; 282 Range range = mRanges[ch >> kLogCharsPerPage]; 283 284 std::vector<FontFamily*> familyVecForVS; 285 if (vs != 0) { 286 // If variation selector is specified, need to search for both the variation sequence and 287 // its base codepoint. Compute the union vector of them. 288 familyVecForVS = mVSFamilyVec; 289 familyVecForVS.insert(familyVecForVS.end(), 290 mFamilyVec.begin() + range.start, mFamilyVec.begin() + range.end); 291 std::sort(familyVecForVS.begin(), familyVecForVS.end()); 292 auto last = std::unique(familyVecForVS.begin(), familyVecForVS.end()); 293 familyVecForVS.erase(last, familyVecForVS.end()); 294 295 familyVec = &familyVecForVS; 296 range = { 0, familyVecForVS.size() }; 297 } 298 299#ifdef VERBOSE_DEBUG 300 ALOGD("querying range %zd:%zd\n", range.start, range.end); 301#endif 302 FontFamily* bestFamily = nullptr; 303 uint32_t bestScore = kUnsupportedFontScore; 304 for (size_t i = range.start; i < range.end; i++) { 305 FontFamily* family = (*familyVec)[i]; 306 const uint32_t score = calcFamilyScore(ch, vs, variant, langListId, family); 307 if (score == kFirstFontScore) { 308 // If the first font family supports the given character or variation sequence, always 309 // use it. 310 return family; 311 } 312 if (score > bestScore) { 313 bestScore = score; 314 bestFamily = family; 315 } 316 } 317 if (bestFamily == nullptr) { 318 UErrorCode errorCode = U_ZERO_ERROR; 319 const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode); 320 if (U_SUCCESS(errorCode)) { 321 UChar decomposed[4]; 322 int len = unorm2_getRawDecomposition(normalizer, ch, decomposed, 4, &errorCode); 323 if (U_SUCCESS(errorCode) && len > 0) { 324 int off = 0; 325 U16_NEXT_UNSAFE(decomposed, off, ch); 326 return getFamilyForChar(ch, vs, langListId, variant); 327 } 328 } 329 bestFamily = mFamilies[0]; 330 } 331 return bestFamily; 332} 333 334const uint32_t NBSP = 0xa0; 335const uint32_t ZWJ = 0x200c; 336const uint32_t ZWNJ = 0x200d; 337const uint32_t HYPHEN = 0x2010; 338const uint32_t NB_HYPHEN = 0x2011; 339 340// Characters where we want to continue using existing font run instead of 341// recomputing the best match in the fallback list. 342static const uint32_t stickyWhitelist[] = { '!', ',', '-', '.', ':', ';', '?', NBSP, ZWJ, ZWNJ, 343 HYPHEN, NB_HYPHEN }; 344 345static bool isStickyWhitelisted(uint32_t c) { 346 for (size_t i = 0; i < sizeof(stickyWhitelist) / sizeof(stickyWhitelist[0]); i++) { 347 if (stickyWhitelist[i] == c) return true; 348 } 349 return false; 350} 351 352static bool isVariationSelector(uint32_t c) { 353 return (0xFE00 <= c && c <= 0xFE0F) || (0xE0100 <= c && c <= 0xE01EF); 354} 355 356bool FontCollection::hasVariationSelector(uint32_t baseCodepoint, 357 uint32_t variationSelector) const { 358 if (!isVariationSelector(variationSelector)) { 359 return false; 360 } 361 if (baseCodepoint >= mMaxChar) { 362 return false; 363 } 364 365 AutoMutex _l(gMinikinLock); 366 367 // Currently mRanges can not be used here since it isn't aware of the variation sequence. 368 for (size_t i = 0; i < mVSFamilyVec.size(); i++) { 369 if (mVSFamilyVec[i]->hasGlyph(baseCodepoint, variationSelector)) { 370 return true; 371 } 372 } 373 374 // Even if there is no cmap format 14 subtable entry for the given sequence, should return true 375 // for emoji + U+FE0E case since we have special fallback rule for the sequence. 376 if (isEmojiStyleVSBase(baseCodepoint) && variationSelector == TEXT_STYLE_VS) { 377 for (size_t i = 0; i < mFamilies.size(); ++i) { 378 if (!mFamilies[i]->isColorEmojiFamily() && variationSelector == TEXT_STYLE_VS && 379 mFamilies[i]->hasGlyph(baseCodepoint, 0)) { 380 return true; 381 } 382 } 383 } 384 385 return false; 386} 387 388void FontCollection::itemize(const uint16_t *string, size_t string_size, FontStyle style, 389 vector<Run>* result) const { 390 const uint32_t langListId = style.getLanguageListId(); 391 int variant = style.getVariant(); 392 FontFamily* lastFamily = NULL; 393 Run* run = NULL; 394 395 if (string_size == 0) { 396 return; 397 } 398 399 const uint32_t kEndOfString = 0xFFFFFFFF; 400 401 uint32_t nextCh = 0; 402 uint32_t prevCh = 0; 403 size_t nextUtf16Pos = 0; 404 size_t readLength = 0; 405 U16_NEXT(string, readLength, string_size, nextCh); 406 407 do { 408 const uint32_t ch = nextCh; 409 const size_t utf16Pos = nextUtf16Pos; 410 nextUtf16Pos = readLength; 411 if (readLength < string_size) { 412 U16_NEXT(string, readLength, string_size, nextCh); 413 } else { 414 nextCh = kEndOfString; 415 } 416 417 bool shouldContinueRun = false; 418 if (lastFamily != nullptr) { 419 if (isStickyWhitelisted(ch)) { 420 // Continue using existing font as long as it has coverage and is whitelisted 421 shouldContinueRun = lastFamily->getCoverage()->get(ch); 422 } else if (isVariationSelector(ch)) { 423 // Always continue if the character is a variation selector. 424 shouldContinueRun = true; 425 } 426 } 427 428 if (!shouldContinueRun) { 429 FontFamily* family = getFamilyForChar(ch, isVariationSelector(nextCh) ? nextCh : 0, 430 langListId, variant); 431 if (utf16Pos == 0 || family != lastFamily) { 432 size_t start = utf16Pos; 433 // Workaround for combining marks and emoji modifiers until we implement 434 // per-cluster font selection: if a combining mark or an emoji modifier is found in 435 // a different font that also supports the previous character, attach previous 436 // character to the new run. U+20E3 COMBINING ENCLOSING KEYCAP, used in emoji, is 437 // handled properly by this since it's a combining mark too. 438 if (utf16Pos != 0 && 439 ((U_GET_GC_MASK(ch) & U_GC_M_MASK) != 0 || 440 (isEmojiModifier(ch) && isEmojiBase(prevCh))) && 441 family && family->getCoverage()->get(prevCh)) { 442 const size_t prevChLength = U16_LENGTH(prevCh); 443 run->end -= prevChLength; 444 if (run->start == run->end) { 445 result->pop_back(); 446 } 447 start -= prevChLength; 448 } 449 Run dummy; 450 result->push_back(dummy); 451 run = &result->back(); 452 run->fakedFont = family->getClosestMatch(style); 453 lastFamily = family; 454 run->start = start; 455 } 456 } 457 prevCh = ch; 458 run->end = nextUtf16Pos; // exclusive 459 } while (nextCh != kEndOfString); 460} 461 462MinikinFont* FontCollection::baseFont(FontStyle style) { 463 return baseFontFaked(style).font; 464} 465 466FakedFont FontCollection::baseFontFaked(FontStyle style) { 467 return mFamilies[0]->getClosestMatch(style); 468} 469 470uint32_t FontCollection::getId() const { 471 return mId; 472} 473 474} // namespace android 475