FontCollection.cpp revision 6b1c227da6492a435f0341d7fe95d9992669920e
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17// #define VERBOSE_DEBUG
18
19#define LOG_TAG "Minikin"
20#include <cutils/log.h>
21#include <algorithm>
22
23#include "unicode/unistr.h"
24#include "unicode/unorm2.h"
25
26#include "FontLanguage.h"
27#include "FontLanguageListCache.h"
28#include "MinikinInternal.h"
29#include <minikin/FontCollection.h>
30
31using std::vector;
32
33namespace android {
34
35template <typename T>
36static inline T max(T a, T b) {
37    return a>b ? a : b;
38}
39
40uint32_t FontCollection::sNextId = 0;
41
42FontCollection::FontCollection(const vector<FontFamily*>& typefaces) :
43    mMaxChar(0) {
44    AutoMutex _l(gMinikinLock);
45    mId = sNextId++;
46    vector<uint32_t> lastChar;
47    size_t nTypefaces = typefaces.size();
48#ifdef VERBOSE_DEBUG
49    ALOGD("nTypefaces = %zd\n", nTypefaces);
50#endif
51    const FontStyle defaultStyle;
52    for (size_t i = 0; i < nTypefaces; i++) {
53        FontFamily* family = typefaces[i];
54        MinikinFont* typeface = family->getClosestMatch(defaultStyle).font;
55        if (typeface == NULL) {
56            continue;
57        }
58        family->RefLocked();
59        const SparseBitSet* coverage = family->getCoverage();
60        if (coverage == nullptr) {
61            family->UnrefLocked();
62            continue;
63        }
64        mFamilies.push_back(family);  // emplace_back would be better
65        if (family->hasVSTable()) {
66            mVSFamilyVec.push_back(family);
67        }
68        mMaxChar = max(mMaxChar, coverage->length());
69        lastChar.push_back(coverage->nextSetBit(0));
70    }
71    nTypefaces = mFamilies.size();
72    LOG_ALWAYS_FATAL_IF(nTypefaces == 0,
73        "Font collection must have at least one valid typeface");
74    size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage;
75    size_t offset = 0;
76    // TODO: Use variation selector map for mRanges construction.
77    // A font can have a glyph for a base code point and variation selector pair but no glyph for
78    // the base code point without variation selector. The family won't be listed in the range in
79    // this case.
80    for (size_t i = 0; i < nPages; i++) {
81        Range dummy;
82        mRanges.push_back(dummy);
83        Range* range = &mRanges.back();
84#ifdef VERBOSE_DEBUG
85        ALOGD("i=%zd: range start = %zd\n", i, offset);
86#endif
87        range->start = offset;
88        for (size_t j = 0; j < nTypefaces; j++) {
89            if (lastChar[j] < (i + 1) << kLogCharsPerPage) {
90                FontFamily* family = mFamilies[j];
91                mFamilyVec.push_back(family);
92                offset++;
93                uint32_t nextChar = family->getCoverage()->nextSetBit((i + 1) << kLogCharsPerPage);
94#ifdef VERBOSE_DEBUG
95                ALOGD("nextChar = %d (j = %zd)\n", nextChar, j);
96#endif
97                lastChar[j] = nextChar;
98            }
99        }
100        range->end = offset;
101    }
102}
103
104FontCollection::~FontCollection() {
105    for (size_t i = 0; i < mFamilies.size(); i++) {
106        mFamilies[i]->UnrefLocked();
107    }
108}
109
110// Special scores for the font fallback.
111const uint32_t kUnsupportedFontScore = 0;
112const uint32_t kFirstFontScore = UINT32_MAX;
113
114// Calculates a font score.
115// The score of the font family is based on three subscores.
116//  - Coverage Score: How well the font family covers the given character or variation sequence.
117//  - Language Score: How well the font family is appropriate for the language.
118//  - Variant Score: Whether the font family matches the variant. Note that this variant is not the
119//    one in BCP47. This is our own font variant (e.g., elegant, compact).
120//
121// Then, there is a priority for these three subscores as follow:
122//   Coverage Score > Language Score > Variant Score
123// The returned score reflects this priority order.
124//
125// Note that there are two special scores.
126//  - kUnsupportedFontScore: When the font family doesn't support the variation sequence or even its
127//    base character.
128//  - kFirstFontScore: When the font is the first font family in the collection and it supports the
129//    given character or variation sequence.
130uint32_t FontCollection::calcFamilyScore(uint32_t ch, uint32_t vs, int variant, uint32_t langListId,
131                                        FontFamily* fontFamily) const {
132
133    const uint32_t coverageScore = calcCoverageScore(ch, vs, fontFamily);
134    if (coverageScore == kFirstFontScore || coverageScore == kUnsupportedFontScore) {
135        // No need to calculate other scores.
136        return coverageScore;
137    }
138
139    const uint32_t languageScore = calcLanguageMatchingScore(langListId, *fontFamily);
140    const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily);
141
142    // Subscores are encoded into 31 bits representation to meet the subscore priority.
143    // The highest 2 bits are for coverage score, then following 28 bits are for language score,
144    // then the last 1 bit is for variant score.
145    return coverageScore << 29 | languageScore << 1 | variantScore;
146}
147
148// Calculates a font score based on variation sequence coverage.
149// - Returns kUnsupportedFontScore if the font doesn't support the variation sequence or its base
150//   character.
151// - Returns kFirstFontScore if the font family is the first font family in the collection and it
152//   supports the given character or variation sequence.
153// - Returns 3 if the font family supports the variation sequence.
154// - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font is an emoji font.
155// - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font is not an emoji font.
156// - Returns 1 if the variation selector is not specified or if the font family only supports the
157//   variation sequence's base character.
158uint32_t FontCollection::calcCoverageScore(uint32_t ch, uint32_t vs, FontFamily* fontFamily) const {
159    const bool hasVSGlyph = (vs != 0) && fontFamily->hasVariationSelector(ch, vs);
160    if (!hasVSGlyph && !fontFamily->getCoverage()->get(ch)) {
161        // The font doesn't support either variation sequence or even the base character.
162        return kUnsupportedFontScore;
163    }
164
165    if ((vs == 0 || hasVSGlyph) && mFamilies[0] == fontFamily) {
166        // If the first font family supports the given character or variation sequence, always use
167        // it.
168        return kFirstFontScore;
169    }
170
171    if (vs == 0) {
172        return 1;
173    }
174
175    if (hasVSGlyph) {
176        return 3;
177    }
178
179    if (vs == 0xFE0F || vs == 0xFE0E) {
180        // TODO use all language in the list.
181        const FontLanguage lang = FontLanguageListCache::getById(fontFamily->langId())[0];
182        const bool hasEmojiFlag = lang.hasEmojiFlag();
183        if (vs == 0xFE0F) {
184            return hasEmojiFlag ? 2 : 1;
185        } else {  // vs == 0xFE0E
186            return hasEmojiFlag ? 1 : 2;
187        }
188    }
189    return 1;
190}
191
192// Calculates font scores based on the script matching and primary langauge matching.
193//
194// If the font's script doesn't support the requested script, the font gets a score of 0. If the
195// font's script supports the requested script and the font has the same primary language as the
196// requested one, the font gets a score of 2. If the font's script supports the requested script
197// but the primary language is different from the requested one, the font gets a score of 1.
198//
199// If two languages in the requested list have the same language score, the font matching with
200// higher priority language gets a higher score. For example, in the case the user requested
201// language list is "ja-Jpan,en-Latn". The score of for the font of "ja-Jpan" gets a higher score
202// than the font of "en-Latn".
203//
204// To achieve the above two conditions, the language score is determined as follows:
205//   LanguageScore = s(0) * 3^(m - 1) + s(1) * 3^(m - 2) + ... + s(m - 2) * 3 + s(m - 1)
206// Here, m is the maximum number of languages to be compared, and s(i) is the i-th language's
207// matching score. The possible values of s(i) are 0, 1 and 2.
208uint32_t FontCollection::calcLanguageMatchingScore(
209        uint32_t userLangListId, const FontFamily& fontFamily) {
210    const FontLanguages& langList = FontLanguageListCache::getById(userLangListId);
211    // TODO use all language in the list.
212    FontLanguage fontLanguage = FontLanguageListCache::getById(fontFamily.langId())[0];
213
214    const size_t maxCompareNum = std::min(langList.size(), FONT_LANGUAGES_LIMIT);
215    uint32_t score = fontLanguage.getScoreFor(langList[0]);  // maxCompareNum can't be zero.
216    for (size_t i = 1; i < maxCompareNum; ++i) {
217        score = score * 3u + fontLanguage.getScoreFor(langList[i]);
218    }
219    return score;
220}
221
222// Calculates a font score based on variant ("compact" or "elegant") matching.
223//  - Returns 1 if the font doesn't have variant or the variant matches with the text style.
224//  - No score if the font has a variant but it doesn't match with the text style.
225uint32_t FontCollection::calcVariantMatchingScore(int variant, const FontFamily& fontFamily) {
226    return (fontFamily.variant() == 0 || fontFamily.variant() == variant) ? 1 : 0;
227}
228
229// Implement heuristic for choosing best-match font. Here are the rules:
230// 1. If first font in the collection has the character, it wins.
231// 2. Calculate a score for the font family. See comments in calcFamilyScore for the detail.
232// 3. Highest score wins, with ties resolved to the first font.
233FontFamily* FontCollection::getFamilyForChar(uint32_t ch, uint32_t vs,
234            uint32_t langListId, int variant) const {
235    if (ch >= mMaxChar) {
236        return NULL;
237    }
238
239    const std::vector<FontFamily*>* familyVec = &mFamilyVec;
240    Range range = mRanges[ch >> kLogCharsPerPage];
241
242    std::vector<FontFamily*> familyVecForVS;
243    if (vs != 0) {
244        // If variation selector is specified, need to search for both the variation sequence and
245        // its base codepoint. Compute the union vector of them.
246        familyVecForVS = mVSFamilyVec;
247        familyVecForVS.insert(familyVecForVS.end(),
248                mFamilyVec.begin() + range.start, mFamilyVec.begin() + range.end);
249        std::sort(familyVecForVS.begin(), familyVecForVS.end());
250        auto last = std::unique(familyVecForVS.begin(), familyVecForVS.end());
251        familyVecForVS.erase(last, familyVecForVS.end());
252
253        familyVec = &familyVecForVS;
254        range = { 0, familyVecForVS.size() };
255    }
256
257#ifdef VERBOSE_DEBUG
258    ALOGD("querying range %zd:%zd\n", range.start, range.end);
259#endif
260    FontFamily* bestFamily = nullptr;
261    uint32_t bestScore = kUnsupportedFontScore;
262    for (size_t i = range.start; i < range.end; i++) {
263        FontFamily* family = (*familyVec)[i];
264        const uint32_t score = calcFamilyScore(ch, vs, variant, langListId, family);
265        if (score == kFirstFontScore) {
266            // If the first font family supports the given character or variation sequence, always
267            // use it.
268            return family;
269        }
270        if (score > bestScore) {
271            bestScore = score;
272            bestFamily = family;
273        }
274    }
275    if (bestFamily == nullptr && !mFamilyVec.empty()) {
276        UErrorCode errorCode = U_ZERO_ERROR;
277        const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode);
278        if (U_SUCCESS(errorCode)) {
279            UChar decomposed[4];
280            int len = unorm2_getRawDecomposition(normalizer, ch, decomposed, 4, &errorCode);
281            if (U_SUCCESS(errorCode) && len > 0) {
282                int off = 0;
283                U16_NEXT_UNSAFE(decomposed, off, ch);
284                return getFamilyForChar(ch, vs, langListId, variant);
285            }
286        }
287        bestFamily = mFamilies[0];
288    }
289    return bestFamily;
290}
291
292const uint32_t NBSP = 0xa0;
293const uint32_t ZWJ = 0x200c;
294const uint32_t ZWNJ = 0x200d;
295const uint32_t KEYCAP = 0x20e3;
296const uint32_t HYPHEN = 0x2010;
297const uint32_t NB_HYPHEN = 0x2011;
298
299// Characters where we want to continue using existing font run instead of
300// recomputing the best match in the fallback list.
301static const uint32_t stickyWhitelist[] = { '!', ',', '-', '.', ':', ';', '?', NBSP, ZWJ, ZWNJ,
302        KEYCAP, HYPHEN, NB_HYPHEN };
303
304static bool isStickyWhitelisted(uint32_t c) {
305    for (size_t i = 0; i < sizeof(stickyWhitelist) / sizeof(stickyWhitelist[0]); i++) {
306        if (stickyWhitelist[i] == c) return true;
307    }
308    return false;
309}
310
311static bool isVariationSelector(uint32_t c) {
312    return (0xFE00 <= c && c <= 0xFE0F) || (0xE0100 <= c && c <= 0xE01EF);
313}
314
315bool FontCollection::hasVariationSelector(uint32_t baseCodepoint,
316        uint32_t variationSelector) const {
317    if (!isVariationSelector(variationSelector)) {
318        return false;
319    }
320    if (baseCodepoint >= mMaxChar) {
321        return false;
322    }
323    if (variationSelector == 0) {
324        return false;
325    }
326
327    // Currently mRanges can not be used here since it isn't aware of the variation sequence.
328    for (size_t i = 0; i < mVSFamilyVec.size(); i++) {
329        AutoMutex _l(gMinikinLock);
330        if (mVSFamilyVec[i]->hasVariationSelector(baseCodepoint, variationSelector)) {
331            return true;
332        }
333    }
334    return false;
335}
336
337void FontCollection::itemize(const uint16_t *string, size_t string_size, FontStyle style,
338        vector<Run>* result) const {
339    const uint32_t langListId = style.getLanguageListId();
340    int variant = style.getVariant();
341    FontFamily* lastFamily = NULL;
342    Run* run = NULL;
343
344    if (string_size == 0) {
345        return;
346    }
347
348    const uint32_t kEndOfString = 0xFFFFFFFF;
349
350    uint32_t nextCh = 0;
351    uint32_t prevCh = 0;
352    size_t nextUtf16Pos = 0;
353    size_t readLength = 0;
354    U16_NEXT(string, readLength, string_size, nextCh);
355
356    do {
357        const uint32_t ch = nextCh;
358        const size_t utf16Pos = nextUtf16Pos;
359        nextUtf16Pos = readLength;
360        if (readLength < string_size) {
361            U16_NEXT(string, readLength, string_size, nextCh);
362        } else {
363            nextCh = kEndOfString;
364        }
365
366        bool shouldContinueRun = false;
367        if (lastFamily != nullptr) {
368            if (isStickyWhitelisted(ch)) {
369                // Continue using existing font as long as it has coverage and is whitelisted
370                shouldContinueRun = lastFamily->getCoverage()->get(ch);
371            } else if (isVariationSelector(ch)) {
372                // Always continue if the character is a variation selector.
373                shouldContinueRun = true;
374            }
375        }
376
377        if (!shouldContinueRun) {
378            FontFamily* family = getFamilyForChar(ch, isVariationSelector(nextCh) ? nextCh : 0,
379                    langListId, variant);
380            if (utf16Pos == 0 || family != lastFamily) {
381                size_t start = utf16Pos;
382                // Workaround for Emoji keycap until we implement per-cluster font
383                // selection: if keycap is found in a different font that also
384                // supports previous char, attach previous char to the new run.
385                // Bug 7557244.
386                if (ch == KEYCAP && utf16Pos != 0 && family && family->getCoverage()->get(prevCh)) {
387                    const size_t prevChLength = U16_LENGTH(prevCh);
388                    run->end -= prevChLength;
389                    if (run->start == run->end) {
390                        result->pop_back();
391                    }
392                    start -= prevChLength;
393                }
394                Run dummy;
395                result->push_back(dummy);
396                run = &result->back();
397                if (family == NULL) {
398                    run->fakedFont.font = NULL;
399                } else {
400                    run->fakedFont = family->getClosestMatch(style);
401                }
402                lastFamily = family;
403                run->start = start;
404            }
405        }
406        prevCh = ch;
407        run->end = nextUtf16Pos;  // exclusive
408    } while (nextCh != kEndOfString);
409}
410
411MinikinFont* FontCollection::baseFont(FontStyle style) {
412    return baseFontFaked(style).font;
413}
414
415FakedFont FontCollection::baseFontFaked(FontStyle style) {
416    if (mFamilies.empty()) {
417        return FakedFont();
418    }
419    return mFamilies[0]->getClosestMatch(style);
420}
421
422uint32_t FontCollection::getId() const {
423    return mId;
424}
425
426}  // namespace android
427