1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17// #define VERBOSE_DEBUG 18 19#define LOG_TAG "Minikin" 20 21#include <algorithm> 22 23#include <log/log.h> 24#include "unicode/unistr.h" 25#include "unicode/unorm2.h" 26 27#include "FontLanguage.h" 28#include "FontLanguageListCache.h" 29#include "MinikinInternal.h" 30#include <minikin/Emoji.h> 31#include <minikin/FontCollection.h> 32 33using std::vector; 34 35namespace minikin { 36 37template <typename T> 38static inline T max(T a, T b) { 39 return a>b ? a : b; 40} 41 42const uint32_t EMOJI_STYLE_VS = 0xFE0F; 43const uint32_t TEXT_STYLE_VS = 0xFE0E; 44 45uint32_t FontCollection::sNextId = 0; 46 47FontCollection::FontCollection(std::shared_ptr<FontFamily>&& typeface) : mMaxChar(0) { 48 std::vector<std::shared_ptr<FontFamily>> typefaces; 49 typefaces.push_back(typeface); 50 init(typefaces); 51} 52 53FontCollection::FontCollection(const vector<std::shared_ptr<FontFamily>>& typefaces) : 54 mMaxChar(0) { 55 init(typefaces); 56} 57 58void FontCollection::init(const vector<std::shared_ptr<FontFamily>>& typefaces) { 59 android::AutoMutex _l(gMinikinLock); 60 mId = sNextId++; 61 vector<uint32_t> lastChar; 62 size_t nTypefaces = typefaces.size(); 63#ifdef VERBOSE_DEBUG 64 ALOGD("nTypefaces = %zd\n", nTypefaces); 65#endif 66 const FontStyle defaultStyle; 67 for (size_t i = 0; i < nTypefaces; i++) { 68 const std::shared_ptr<FontFamily>& family = typefaces[i]; 69 if (family->getClosestMatch(defaultStyle).font == nullptr) { 70 continue; 71 } 72 const SparseBitSet& coverage = family->getCoverage(); 73 mFamilies.push_back(family); // emplace_back would be better 74 if (family->hasVSTable()) { 75 mVSFamilyVec.push_back(family); 76 } 77 mMaxChar = max(mMaxChar, coverage.length()); 78 lastChar.push_back(coverage.nextSetBit(0)); 79 80 const std::unordered_set<AxisTag>& supportedAxes = family->supportedAxes(); 81 mSupportedAxes.insert(supportedAxes.begin(), supportedAxes.end()); 82 } 83 nTypefaces = mFamilies.size(); 84 LOG_ALWAYS_FATAL_IF(nTypefaces == 0, 85 "Font collection must have at least one valid typeface"); 86 LOG_ALWAYS_FATAL_IF(nTypefaces > 254, 87 "Font collection may only have up to 254 font families."); 88 size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage; 89 // TODO: Use variation selector map for mRanges construction. 90 // A font can have a glyph for a base code point and variation selector pair but no glyph for 91 // the base code point without variation selector. The family won't be listed in the range in 92 // this case. 93 for (size_t i = 0; i < nPages; i++) { 94 Range dummy; 95 mRanges.push_back(dummy); 96 Range* range = &mRanges.back(); 97#ifdef VERBOSE_DEBUG 98 ALOGD("i=%zd: range start = %zd\n", i, offset); 99#endif 100 range->start = mFamilyVec.size(); 101 for (size_t j = 0; j < nTypefaces; j++) { 102 if (lastChar[j] < (i + 1) << kLogCharsPerPage) { 103 const std::shared_ptr<FontFamily>& family = mFamilies[j]; 104 mFamilyVec.push_back(static_cast<uint8_t>(j)); 105 uint32_t nextChar = family->getCoverage().nextSetBit((i + 1) << kLogCharsPerPage); 106#ifdef VERBOSE_DEBUG 107 ALOGD("nextChar = %d (j = %zd)\n", nextChar, j); 108#endif 109 lastChar[j] = nextChar; 110 } 111 } 112 range->end = mFamilyVec.size(); 113 } 114 // See the comment in Range for more details. 115 LOG_ALWAYS_FATAL_IF(mFamilyVec.size() >= 0xFFFF, 116 "Exceeded the maximum indexable cmap coverage."); 117} 118 119// Special scores for the font fallback. 120const uint32_t kUnsupportedFontScore = 0; 121const uint32_t kFirstFontScore = UINT32_MAX; 122 123// Calculates a font score. 124// The score of the font family is based on three subscores. 125// - Coverage Score: How well the font family covers the given character or variation sequence. 126// - Language Score: How well the font family is appropriate for the language. 127// - Variant Score: Whether the font family matches the variant. Note that this variant is not the 128// one in BCP47. This is our own font variant (e.g., elegant, compact). 129// 130// Then, there is a priority for these three subscores as follow: 131// Coverage Score > Language Score > Variant Score 132// The returned score reflects this priority order. 133// 134// Note that there are two special scores. 135// - kUnsupportedFontScore: When the font family doesn't support the variation sequence or even its 136// base character. 137// - kFirstFontScore: When the font is the first font family in the collection and it supports the 138// given character or variation sequence. 139uint32_t FontCollection::calcFamilyScore(uint32_t ch, uint32_t vs, int variant, uint32_t langListId, 140 const std::shared_ptr<FontFamily>& fontFamily) const { 141 142 const uint32_t coverageScore = calcCoverageScore(ch, vs, fontFamily); 143 if (coverageScore == kFirstFontScore || coverageScore == kUnsupportedFontScore) { 144 // No need to calculate other scores. 145 return coverageScore; 146 } 147 148 const uint32_t languageScore = calcLanguageMatchingScore(langListId, *fontFamily); 149 const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily); 150 151 // Subscores are encoded into 31 bits representation to meet the subscore priority. 152 // The highest 2 bits are for coverage score, then following 28 bits are for language score, 153 // then the last 1 bit is for variant score. 154 return coverageScore << 29 | languageScore << 1 | variantScore; 155} 156 157// Calculates a font score based on variation sequence coverage. 158// - Returns kUnsupportedFontScore if the font doesn't support the variation sequence or its base 159// character. 160// - Returns kFirstFontScore if the font family is the first font family in the collection and it 161// supports the given character or variation sequence. 162// - Returns 3 if the font family supports the variation sequence. 163// - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font is an emoji font. 164// - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font is not an emoji font. 165// - Returns 1 if the variation selector is not specified or if the font family only supports the 166// variation sequence's base character. 167uint32_t FontCollection::calcCoverageScore(uint32_t ch, uint32_t vs, 168 const std::shared_ptr<FontFamily>& fontFamily) const { 169 const bool hasVSGlyph = (vs != 0) && fontFamily->hasGlyph(ch, vs); 170 if (!hasVSGlyph && !fontFamily->getCoverage().get(ch)) { 171 // The font doesn't support either variation sequence or even the base character. 172 return kUnsupportedFontScore; 173 } 174 175 if ((vs == 0 || hasVSGlyph) && mFamilies[0] == fontFamily) { 176 // If the first font family supports the given character or variation sequence, always use 177 // it. 178 return kFirstFontScore; 179 } 180 181 if (vs == 0) { 182 return 1; 183 } 184 185 if (hasVSGlyph) { 186 return 3; 187 } 188 189 if (vs == EMOJI_STYLE_VS || vs == TEXT_STYLE_VS) { 190 const FontLanguages& langs = FontLanguageListCache::getById(fontFamily->langId()); 191 bool hasEmojiFlag = false; 192 for (size_t i = 0; i < langs.size(); ++i) { 193 if (langs[i].getEmojiStyle() == FontLanguage::EMSTYLE_EMOJI) { 194 hasEmojiFlag = true; 195 break; 196 } 197 } 198 199 if (vs == EMOJI_STYLE_VS) { 200 return hasEmojiFlag ? 2 : 1; 201 } else { // vs == TEXT_STYLE_VS 202 return hasEmojiFlag ? 1 : 2; 203 } 204 } 205 return 1; 206} 207 208// Calculate font scores based on the script matching, subtag matching and primary langauge matching. 209// 210// 1. If only the font's language matches or there is no matches between requested font and 211// supported font, then the font obtains a score of 0. 212// 2. Without a match in language, considering subtag may change font's EmojiStyle over script, 213// a match in subtag gets a score of 2 and a match in scripts gains a score of 1. 214// 3. Regarding to two elements matchings, language-and-subtag matching has a score of 4, while 215// language-and-script obtains a socre of 3 with the same reason above. 216// 217// If two languages in the requested list have the same language score, the font matching with 218// higher priority language gets a higher score. For example, in the case the user requested 219// language list is "ja-Jpan,en-Latn". The score of for the font of "ja-Jpan" gets a higher score 220// than the font of "en-Latn". 221// 222// To achieve score calculation with priorities, the language score is determined as follows: 223// LanguageScore = s(0) * 5^(m - 1) + s(1) * 5^(m - 2) + ... + s(m - 2) * 5 + s(m - 1) 224// Here, m is the maximum number of languages to be compared, and s(i) is the i-th language's 225// matching score. The possible values of s(i) are 0, 1, 2, 3 and 4. 226uint32_t FontCollection::calcLanguageMatchingScore( 227 uint32_t userLangListId, const FontFamily& fontFamily) { 228 const FontLanguages& langList = FontLanguageListCache::getById(userLangListId); 229 const FontLanguages& fontLanguages = FontLanguageListCache::getById(fontFamily.langId()); 230 231 const size_t maxCompareNum = std::min(langList.size(), FONT_LANGUAGES_LIMIT); 232 uint32_t score = 0; 233 for (size_t i = 0; i < maxCompareNum; ++i) { 234 score = score * 5u + langList[i].calcScoreFor(fontLanguages); 235 } 236 return score; 237} 238 239// Calculates a font score based on variant ("compact" or "elegant") matching. 240// - Returns 1 if the font doesn't have variant or the variant matches with the text style. 241// - No score if the font has a variant but it doesn't match with the text style. 242uint32_t FontCollection::calcVariantMatchingScore(int variant, const FontFamily& fontFamily) { 243 return (fontFamily.variant() == 0 || fontFamily.variant() == variant) ? 1 : 0; 244} 245 246// Implement heuristic for choosing best-match font. Here are the rules: 247// 1. If first font in the collection has the character, it wins. 248// 2. Calculate a score for the font family. See comments in calcFamilyScore for the detail. 249// 3. Highest score wins, with ties resolved to the first font. 250// This method never returns nullptr. 251const std::shared_ptr<FontFamily>& FontCollection::getFamilyForChar(uint32_t ch, uint32_t vs, 252 uint32_t langListId, int variant) const { 253 if (ch >= mMaxChar) { 254 return mFamilies[0]; 255 } 256 257 Range range = mRanges[ch >> kLogCharsPerPage]; 258 259 if (vs != 0) { 260 range = { 0, static_cast<uint16_t>(mFamilies.size()) }; 261 } 262 263#ifdef VERBOSE_DEBUG 264 ALOGD("querying range %zd:%zd\n", range.start, range.end); 265#endif 266 int bestFamilyIndex = -1; 267 uint32_t bestScore = kUnsupportedFontScore; 268 for (size_t i = range.start; i < range.end; i++) { 269 const std::shared_ptr<FontFamily>& family = 270 vs == 0 ? mFamilies[mFamilyVec[i]] : mFamilies[i]; 271 const uint32_t score = calcFamilyScore(ch, vs, variant, langListId, family); 272 if (score == kFirstFontScore) { 273 // If the first font family supports the given character or variation sequence, always 274 // use it. 275 return family; 276 } 277 if (score > bestScore) { 278 bestScore = score; 279 bestFamilyIndex = i; 280 } 281 } 282 if (bestFamilyIndex == -1) { 283 UErrorCode errorCode = U_ZERO_ERROR; 284 const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode); 285 if (U_SUCCESS(errorCode)) { 286 UChar decomposed[4]; 287 int len = unorm2_getRawDecomposition(normalizer, ch, decomposed, 4, &errorCode); 288 if (U_SUCCESS(errorCode) && len > 0) { 289 int off = 0; 290 U16_NEXT_UNSAFE(decomposed, off, ch); 291 return getFamilyForChar(ch, vs, langListId, variant); 292 } 293 } 294 return mFamilies[0]; 295 } 296 return vs == 0 ? mFamilies[mFamilyVec[bestFamilyIndex]] : mFamilies[bestFamilyIndex]; 297} 298 299const uint32_t NBSP = 0x00A0; 300const uint32_t SOFT_HYPHEN = 0x00AD; 301const uint32_t ZWJ = 0x200C; 302const uint32_t ZWNJ = 0x200D; 303const uint32_t HYPHEN = 0x2010; 304const uint32_t NB_HYPHEN = 0x2011; 305const uint32_t NNBSP = 0x202F; 306const uint32_t FEMALE_SIGN = 0x2640; 307const uint32_t MALE_SIGN = 0x2642; 308const uint32_t STAFF_OF_AESCULAPIUS = 0x2695; 309 310// Characters where we want to continue using existing font run instead of 311// recomputing the best match in the fallback list. 312static const uint32_t stickyWhitelist[] = { 313 '!', ',', '-', '.', ':', ';', '?', NBSP, ZWJ, ZWNJ, 314 HYPHEN, NB_HYPHEN, NNBSP, FEMALE_SIGN, MALE_SIGN, STAFF_OF_AESCULAPIUS }; 315 316static bool isStickyWhitelisted(uint32_t c) { 317 for (size_t i = 0; i < sizeof(stickyWhitelist) / sizeof(stickyWhitelist[0]); i++) { 318 if (stickyWhitelist[i] == c) return true; 319 } 320 return false; 321} 322 323bool FontCollection::hasVariationSelector(uint32_t baseCodepoint, 324 uint32_t variationSelector) const { 325 if (!isVariationSelector(variationSelector)) { 326 return false; 327 } 328 if (baseCodepoint >= mMaxChar) { 329 return false; 330 } 331 332 // Currently mRanges can not be used here since it isn't aware of the variation sequence. 333 for (size_t i = 0; i < mVSFamilyVec.size(); i++) { 334 if (mVSFamilyVec[i]->hasGlyph(baseCodepoint, variationSelector)) { 335 return true; 336 } 337 } 338 339 // TODO: We can remove this lock by precomputing color emoji information. 340 android::AutoMutex _l(gMinikinLock); 341 342 // Even if there is no cmap format 14 subtable entry for the given sequence, should return true 343 // for <char, text presentation selector> case since we have special fallback rule for the 344 // sequence. Note that we don't need to restrict this to already standardized variation 345 // sequences, since Unicode is adding variation sequences more frequently now and may even move 346 // towards allowing text and emoji variation selectors on any character. 347 if (variationSelector == TEXT_STYLE_VS) { 348 for (size_t i = 0; i < mFamilies.size(); ++i) { 349 if (!mFamilies[i]->isColorEmojiFamily() && mFamilies[i]->hasGlyph(baseCodepoint, 0)) { 350 return true; 351 } 352 } 353 } 354 355 return false; 356} 357 358void FontCollection::itemize(const uint16_t *string, size_t string_size, FontStyle style, 359 vector<Run>* result) const { 360 const uint32_t langListId = style.getLanguageListId(); 361 int variant = style.getVariant(); 362 const FontFamily* lastFamily = nullptr; 363 Run* run = NULL; 364 365 if (string_size == 0) { 366 return; 367 } 368 369 const uint32_t kEndOfString = 0xFFFFFFFF; 370 371 uint32_t nextCh = 0; 372 uint32_t prevCh = 0; 373 size_t nextUtf16Pos = 0; 374 size_t readLength = 0; 375 U16_NEXT(string, readLength, string_size, nextCh); 376 377 do { 378 const uint32_t ch = nextCh; 379 const size_t utf16Pos = nextUtf16Pos; 380 nextUtf16Pos = readLength; 381 if (readLength < string_size) { 382 U16_NEXT(string, readLength, string_size, nextCh); 383 } else { 384 nextCh = kEndOfString; 385 } 386 387 bool shouldContinueRun = false; 388 if (lastFamily != nullptr) { 389 if (isStickyWhitelisted(ch)) { 390 // Continue using existing font as long as it has coverage and is whitelisted 391 shouldContinueRun = lastFamily->getCoverage().get(ch); 392 } else if (ch == SOFT_HYPHEN || isVariationSelector(ch)) { 393 // Always continue if the character is the soft hyphen or a variation selector. 394 shouldContinueRun = true; 395 } 396 } 397 398 if (!shouldContinueRun) { 399 const std::shared_ptr<FontFamily>& family = getFamilyForChar( 400 ch, isVariationSelector(nextCh) ? nextCh : 0, langListId, variant); 401 if (utf16Pos == 0 || family.get() != lastFamily) { 402 size_t start = utf16Pos; 403 // Workaround for combining marks and emoji modifiers until we implement 404 // per-cluster font selection: if a combining mark or an emoji modifier is found in 405 // a different font that also supports the previous character, attach previous 406 // character to the new run. U+20E3 COMBINING ENCLOSING KEYCAP, used in emoji, is 407 // handled properly by this since it's a combining mark too. 408 if (utf16Pos != 0 && 409 ((U_GET_GC_MASK(ch) & U_GC_M_MASK) != 0 || 410 (isEmojiModifier(ch) && isEmojiBase(prevCh))) && 411 family != nullptr && family->getCoverage().get(prevCh)) { 412 const size_t prevChLength = U16_LENGTH(prevCh); 413 run->end -= prevChLength; 414 if (run->start == run->end) { 415 result->pop_back(); 416 } 417 start -= prevChLength; 418 } 419 result->push_back({family->getClosestMatch(style), static_cast<int>(start), 0}); 420 run = &result->back(); 421 lastFamily = family.get(); 422 } 423 } 424 prevCh = ch; 425 run->end = nextUtf16Pos; // exclusive 426 } while (nextCh != kEndOfString); 427} 428 429FakedFont FontCollection::baseFontFaked(FontStyle style) { 430 return mFamilies[0]->getClosestMatch(style); 431} 432 433std::shared_ptr<FontCollection> FontCollection::createCollectionWithVariation( 434 const std::vector<FontVariation>& variations) { 435 if (variations.empty() || mSupportedAxes.empty()) { 436 return nullptr; 437 } 438 439 bool hasSupportedAxis = false; 440 for (const FontVariation& variation : variations) { 441 if (mSupportedAxes.find(variation.axisTag) != mSupportedAxes.end()) { 442 hasSupportedAxis = true; 443 break; 444 } 445 } 446 if (!hasSupportedAxis) { 447 // None of variation axes are supported by this font collection. 448 return nullptr; 449 } 450 451 std::vector<std::shared_ptr<FontFamily> > families; 452 for (const std::shared_ptr<FontFamily>& family : mFamilies) { 453 std::shared_ptr<FontFamily> newFamily = family->createFamilyWithVariation(variations); 454 if (newFamily) { 455 families.push_back(newFamily); 456 } else { 457 families.push_back(family); 458 } 459 } 460 461 return std::shared_ptr<FontCollection>(new FontCollection(families)); 462} 463 464uint32_t FontCollection::getId() const { 465 return mId; 466} 467 468} // namespace minikin 469