FontCollection.cpp revision 6f9966ea7c1910fd780cf7779cc59701c9b98a2b
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17// #define VERBOSE_DEBUG
18
19#define LOG_TAG "Minikin"
20#include <cutils/log.h>
21#include <algorithm>
22
23#include "unicode/unistr.h"
24#include "unicode/unorm2.h"
25
26#include "FontLanguage.h"
27#include "FontLanguageListCache.h"
28#include "MinikinInternal.h"
29#include <minikin/FontCollection.h>
30
31using std::vector;
32
33namespace android {
34
35template <typename T>
36static inline T max(T a, T b) {
37    return a>b ? a : b;
38}
39
40uint32_t FontCollection::sNextId = 0;
41
42FontCollection::FontCollection(const vector<FontFamily*>& typefaces) :
43    mMaxChar(0) {
44    AutoMutex _l(gMinikinLock);
45    mId = sNextId++;
46    vector<uint32_t> lastChar;
47    size_t nTypefaces = typefaces.size();
48#ifdef VERBOSE_DEBUG
49    ALOGD("nTypefaces = %zd\n", nTypefaces);
50#endif
51    const FontStyle defaultStyle;
52    for (size_t i = 0; i < nTypefaces; i++) {
53        FontFamily* family = typefaces[i];
54        MinikinFont* typeface = family->getClosestMatch(defaultStyle).font;
55        if (typeface == NULL) {
56            continue;
57        }
58        family->RefLocked();
59        const SparseBitSet* coverage = family->getCoverage();
60        if (coverage == nullptr) {
61            family->UnrefLocked();
62            continue;
63        }
64        mFamilies.push_back(family);  // emplace_back would be better
65        mMaxChar = max(mMaxChar, coverage->length());
66        lastChar.push_back(coverage->nextSetBit(0));
67    }
68    nTypefaces = mFamilies.size();
69    LOG_ALWAYS_FATAL_IF(nTypefaces == 0,
70        "Font collection must have at least one valid typeface");
71    size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage;
72    size_t offset = 0;
73    // TODO: Use variation selector map for mRanges construction.
74    // A font can have a glyph for a base code point and variation selector pair but no glyph for
75    // the base code point without variation selector. The family won't be listed in the range in
76    // this case.
77    for (size_t i = 0; i < nPages; i++) {
78        Range dummy;
79        mRanges.push_back(dummy);
80        Range* range = &mRanges.back();
81#ifdef VERBOSE_DEBUG
82        ALOGD("i=%zd: range start = %zd\n", i, offset);
83#endif
84        range->start = offset;
85        for (size_t j = 0; j < nTypefaces; j++) {
86            if (lastChar[j] < (i + 1) << kLogCharsPerPage) {
87                FontFamily* family = mFamilies[j];
88                mFamilyVec.push_back(family);
89                offset++;
90                uint32_t nextChar = family->getCoverage()->nextSetBit((i + 1) << kLogCharsPerPage);
91#ifdef VERBOSE_DEBUG
92                ALOGD("nextChar = %d (j = %zd)\n", nextChar, j);
93#endif
94                lastChar[j] = nextChar;
95            }
96        }
97        range->end = offset;
98    }
99}
100
101FontCollection::~FontCollection() {
102    for (size_t i = 0; i < mFamilies.size(); i++) {
103        mFamilies[i]->UnrefLocked();
104    }
105}
106
107// Special scores for the font fallback.
108const uint32_t kUnsupportedFontScore = 0;
109const uint32_t kFirstFontScore = UINT32_MAX;
110
111// Calculates a font score.
112// The score of the font family is based on three subscores.
113//  - Coverage Score: How well the font family covers the given character or variation sequence.
114//  - Language Score: How well the font family is appropriate for the language.
115//  - Variant Score: Whether the font family matches the variant. Note that this variant is not the
116//    one in BCP47. This is our own font variant (e.g., elegant, compact).
117//
118// Then, there is a priority for these three subscores as follow:
119//   Coverage Score > Language Score > Variant Score
120// The returned score reflects this priority order.
121//
122// Note that there are two special scores.
123//  - kUnsupportedFontScore: When the font family doesn't support the variation sequence or even its
124//    base character.
125//  - kFirstFontScore: When the font is the first font family in the collection and it supports the
126//    given character or variation sequence.
127uint32_t FontCollection::calcFamilyScore(uint32_t ch, uint32_t vs, int variant, uint32_t langListId,
128                                        FontFamily* fontFamily) const {
129
130    const uint32_t coverageScore = calcCoverageScore(ch, vs, fontFamily);
131    if (coverageScore == kFirstFontScore || coverageScore == kUnsupportedFontScore) {
132        // No need to calculate other scores.
133        return coverageScore;
134    }
135
136    const uint32_t languageScore = calcLanguageMatchingScore(langListId, *fontFamily);
137    const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily);
138
139    // Subscores are encoded into 31 bits representation to meet the subscore priority.
140    // The highest 2 bits are for coverage score, then following 28 bits are for language score,
141    // then the last 1 bit is for variant score.
142    return coverageScore << 29 | languageScore << 1 | variantScore;
143}
144
145// Calculates a font score based on variation sequence coverage.
146// - Returns kUnsupportedFontScore if the font doesn't support the variation sequence or its base
147//   character.
148// - Returns kFirstFontScore if the font family is the first font family in the collection and it
149//   supports the given character or variation sequence.
150// - Returns 3 if the font family supports the variation sequence.
151// - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font is an emoji font.
152// - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font is not an emoji font.
153// - Returns 1 if the variation selector is not specified or if the font family only supports the
154//   variation sequence's base character.
155uint32_t FontCollection::calcCoverageScore(uint32_t ch, uint32_t vs, FontFamily* fontFamily) const {
156    const bool hasVSGlyph = (vs != 0) && fontFamily->hasVariationSelector(ch, vs);
157    if (!hasVSGlyph && !fontFamily->getCoverage()->get(ch)) {
158        // The font doesn't support either variation sequence or even the base character.
159        return kUnsupportedFontScore;
160    }
161
162    if ((vs == 0 || hasVSGlyph) && mFamilies[0] == fontFamily) {
163        // If the first font family supports the given character or variation sequence, always use
164        // it.
165        return kFirstFontScore;
166    }
167
168    if (vs == 0) {
169        return 1;
170    }
171
172    if (hasVSGlyph) {
173        return 3;
174    }
175
176    if (vs == 0xFE0F || vs == 0xFE0E) {
177        // TODO use all language in the list.
178        const FontLanguage lang = FontLanguageListCache::getById(fontFamily->langId())[0];
179        const bool hasEmojiFlag = lang.hasEmojiFlag();
180        if (vs == 0xFE0F) {
181            return hasEmojiFlag ? 2 : 1;
182        } else {  // vs == 0xFE0E
183            return hasEmojiFlag ? 1 : 2;
184        }
185    }
186    return 1;
187}
188
189// Calculates font scores based on the script matching and primary langauge matching.
190//
191// If the font's script doesn't support the requested script, the font gets a score of 0. If the
192// font's script supports the requested script and the font has the same primary language as the
193// requested one, the font gets a score of 2. If the font's script supports the requested script
194// but the primary language is different from the requested one, the font gets a score of 1.
195//
196// If two languages in the requested list have the same language score, the font matching with
197// higher priority language gets a higher score. For example, in the case the user requested
198// language list is "ja-Jpan,en-Latn". The score of for the font of "ja-Jpan" gets a higher score
199// than the font of "en-Latn".
200//
201// To achieve the above two conditions, the language score is determined as follows:
202//   LanguageScore = s(0) * 3^(m - 1) + s(1) * 3^(m - 2) + ... + s(m - 2) * 3 + s(m - 1)
203// Here, m is the maximum number of languages to be compared, and s(i) is the i-th language's
204// matching score. The possible values of s(i) are 0, 1 and 2.
205uint32_t FontCollection::calcLanguageMatchingScore(
206        uint32_t userLangListId, const FontFamily& fontFamily) {
207    const FontLanguages& langList = FontLanguageListCache::getById(userLangListId);
208    // TODO use all language in the list.
209    FontLanguage fontLanguage = FontLanguageListCache::getById(fontFamily.langId())[0];
210
211    const size_t maxCompareNum = std::min(langList.size(), FONT_LANGUAGES_LIMIT);
212    uint32_t score = fontLanguage.getScoreFor(langList[0]);  // maxCompareNum can't be zero.
213    for (size_t i = 1; i < maxCompareNum; ++i) {
214        score = score * 3u + fontLanguage.getScoreFor(langList[i]);
215    }
216    return score;
217}
218
219// Calculates a font score based on variant ("compact" or "elegant") matching.
220//  - Returns 1 if the font doesn't have variant or the variant matches with the text style.
221//  - No score if the font has a variant but it doesn't match with the text style.
222uint32_t FontCollection::calcVariantMatchingScore(int variant, const FontFamily& fontFamily) {
223    return (fontFamily.variant() == 0 || fontFamily.variant() == variant) ? 1 : 0;
224}
225
226// Implement heuristic for choosing best-match font. Here are the rules:
227// 1. If first font in the collection has the character, it wins.
228// 2. Calculate a score for the font family. See comments in calcFamilyScore for the detail.
229// 3. Highest score wins, with ties resolved to the first font.
230FontFamily* FontCollection::getFamilyForChar(uint32_t ch, uint32_t vs,
231            uint32_t langListId, int variant) const {
232    if (ch >= mMaxChar) {
233        return NULL;
234    }
235
236    // Even if the font supports variation sequence, mRanges isn't aware of the base character of
237    // the sequence. Search all FontFamilies if variation sequence is specified.
238    // TODO: Always use mRanges for font search.
239    const std::vector<FontFamily*>& familyVec = (vs == 0) ? mFamilyVec : mFamilies;
240    Range range;
241    if (vs == 0) {
242        range = mRanges[ch >> kLogCharsPerPage];
243    } else {
244        range = { 0, mFamilies.size() };
245    }
246
247#ifdef VERBOSE_DEBUG
248    ALOGD("querying range %zd:%zd\n", range.start, range.end);
249#endif
250    FontFamily* bestFamily = nullptr;
251    uint32_t bestScore = kUnsupportedFontScore;
252    for (size_t i = range.start; i < range.end; i++) {
253        FontFamily* family = familyVec[i];
254        const uint32_t score = calcFamilyScore(ch, vs, variant, langListId, family);
255        if (score == kFirstFontScore) {
256            // If the first font family supports the given character or variation sequence, always
257            // use it.
258            return family;
259        }
260        if (score > bestScore) {
261            bestScore = score;
262            bestFamily = family;
263        }
264    }
265    if (bestFamily == nullptr && !mFamilyVec.empty()) {
266        UErrorCode errorCode = U_ZERO_ERROR;
267        const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode);
268        if (U_SUCCESS(errorCode)) {
269            UChar decomposed[4];
270            int len = unorm2_getRawDecomposition(normalizer, ch, decomposed, 4, &errorCode);
271            if (U_SUCCESS(errorCode) && len > 0) {
272                int off = 0;
273                U16_NEXT_UNSAFE(decomposed, off, ch);
274                return getFamilyForChar(ch, vs, langListId, variant);
275            }
276        }
277        bestFamily = mFamilies[0];
278    }
279    return bestFamily;
280}
281
282const uint32_t NBSP = 0xa0;
283const uint32_t ZWJ = 0x200c;
284const uint32_t ZWNJ = 0x200d;
285const uint32_t KEYCAP = 0x20e3;
286const uint32_t HYPHEN = 0x2010;
287const uint32_t NB_HYPHEN = 0x2011;
288
289// Characters where we want to continue using existing font run instead of
290// recomputing the best match in the fallback list.
291static const uint32_t stickyWhitelist[] = { '!', ',', '-', '.', ':', ';', '?', NBSP, ZWJ, ZWNJ,
292        KEYCAP, HYPHEN, NB_HYPHEN };
293
294static bool isStickyWhitelisted(uint32_t c) {
295    for (size_t i = 0; i < sizeof(stickyWhitelist) / sizeof(stickyWhitelist[0]); i++) {
296        if (stickyWhitelist[i] == c) return true;
297    }
298    return false;
299}
300
301static bool isVariationSelector(uint32_t c) {
302    return (0xFE00 <= c && c <= 0xFE0F) || (0xE0100 <= c && c <= 0xE01EF);
303}
304
305bool FontCollection::hasVariationSelector(uint32_t baseCodepoint,
306        uint32_t variationSelector) const {
307    if (!isVariationSelector(variationSelector)) {
308        return false;
309    }
310    if (baseCodepoint >= mMaxChar) {
311        return false;
312    }
313    // Currently mRanges can not be used here since it isn't aware of the variation sequence.
314    // TODO: Use mRanges for narrowing down the search range.
315    for (size_t i = 0; i < mFamilies.size(); i++) {
316        AutoMutex _l(gMinikinLock);
317        if (mFamilies[i]->hasVariationSelector(baseCodepoint, variationSelector)) {
318          return true;
319        }
320    }
321    return false;
322}
323
324void FontCollection::itemize(const uint16_t *string, size_t string_size, FontStyle style,
325        vector<Run>* result) const {
326    const uint32_t langListId = style.getLanguageListId();
327    int variant = style.getVariant();
328    FontFamily* lastFamily = NULL;
329    Run* run = NULL;
330
331    if (string_size == 0) {
332        return;
333    }
334
335    const uint32_t kEndOfString = 0xFFFFFFFF;
336
337    uint32_t nextCh = 0;
338    uint32_t prevCh = 0;
339    size_t nextUtf16Pos = 0;
340    size_t readLength = 0;
341    U16_NEXT(string, readLength, string_size, nextCh);
342
343    do {
344        const uint32_t ch = nextCh;
345        const size_t utf16Pos = nextUtf16Pos;
346        nextUtf16Pos = readLength;
347        if (readLength < string_size) {
348            U16_NEXT(string, readLength, string_size, nextCh);
349        } else {
350            nextCh = kEndOfString;
351        }
352
353        bool shouldContinueRun = false;
354        if (lastFamily != nullptr) {
355            if (isStickyWhitelisted(ch)) {
356                // Continue using existing font as long as it has coverage and is whitelisted
357                shouldContinueRun = lastFamily->getCoverage()->get(ch);
358            } else if (isVariationSelector(ch)) {
359                // Always continue if the character is a variation selector.
360                shouldContinueRun = true;
361            }
362        }
363
364        if (!shouldContinueRun) {
365            FontFamily* family = getFamilyForChar(ch, isVariationSelector(nextCh) ? nextCh : 0,
366                    langListId, variant);
367            if (utf16Pos == 0 || family != lastFamily) {
368                size_t start = utf16Pos;
369                // Workaround for Emoji keycap until we implement per-cluster font
370                // selection: if keycap is found in a different font that also
371                // supports previous char, attach previous char to the new run.
372                // Bug 7557244.
373                if (ch == KEYCAP && utf16Pos != 0 && family && family->getCoverage()->get(prevCh)) {
374                    const size_t prevChLength = U16_LENGTH(prevCh);
375                    run->end -= prevChLength;
376                    if (run->start == run->end) {
377                        result->pop_back();
378                    }
379                    start -= prevChLength;
380                }
381                Run dummy;
382                result->push_back(dummy);
383                run = &result->back();
384                if (family == NULL) {
385                    run->fakedFont.font = NULL;
386                } else {
387                    run->fakedFont = family->getClosestMatch(style);
388                }
389                lastFamily = family;
390                run->start = start;
391            }
392        }
393        prevCh = ch;
394        run->end = nextUtf16Pos;  // exclusive
395    } while (nextCh != kEndOfString);
396}
397
398MinikinFont* FontCollection::baseFont(FontStyle style) {
399    return baseFontFaked(style).font;
400}
401
402FakedFont FontCollection::baseFontFaked(FontStyle style) {
403    if (mFamilies.empty()) {
404        return FakedFont();
405    }
406    return mFamilies[0]->getClosestMatch(style);
407}
408
409uint32_t FontCollection::getId() const {
410    return mId;
411}
412
413void FontCollection::purgeFontFamilyHbFontCache() const {
414    assertMinikinLocked();
415    for (size_t i = 0; i < mFamilies.size(); ++i) {
416        mFamilies[i]->purgeHbFontCache();
417    }
418}
419
420}  // namespace android
421