1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef MINIKIN_FONT_LANGUAGE_H
18#define MINIKIN_FONT_LANGUAGE_H
19
20#include <string>
21#include <vector>
22
23#include <hb.h>
24
25namespace minikin {
26
27// Due to the limits in font fallback score calculation, we can't use anything more than 12
28// languages.
29const size_t FONT_LANGUAGES_LIMIT = 12;
30
31// The language or region code is encoded to 15 bits.
32const uint16_t INVALID_CODE = 0x7fff;
33
34class FontLanguages;
35
36// FontLanguage is a compact representation of a BCP 47 language tag. It
37// does not capture all possible information, only what directly affects
38// font rendering.
39struct FontLanguage {
40public:
41    enum EmojiStyle : uint8_t {
42        EMSTYLE_EMPTY = 0,
43        EMSTYLE_DEFAULT = 1,
44        EMSTYLE_EMOJI = 2,
45        EMSTYLE_TEXT = 3,
46    };
47    // Default constructor creates the unsupported language.
48    FontLanguage()
49            : mScript(0ul),
50            mLanguage(INVALID_CODE),
51            mRegion(INVALID_CODE),
52            mHbLanguage(HB_LANGUAGE_INVALID),
53            mSubScriptBits(0ul),
54            mEmojiStyle(EMSTYLE_EMPTY) {}
55
56    // Parse from string
57    FontLanguage(const char* buf, size_t length);
58
59    bool operator==(const FontLanguage other) const {
60        return !isUnsupported() && isEqualScript(other) && mLanguage == other.mLanguage &&
61                mRegion == other.mRegion && mEmojiStyle == other.mEmojiStyle;
62    }
63
64    bool operator!=(const FontLanguage other) const {
65        return !(*this == other);
66    }
67
68    bool isUnsupported() const { return mLanguage == INVALID_CODE; }
69    EmojiStyle getEmojiStyle() const { return mEmojiStyle; }
70    hb_language_t getHbLanguage() const { return mHbLanguage; }
71
72
73    bool isEqualScript(const FontLanguage& other) const;
74
75    // Returns true if this script supports the given script. For example, ja-Jpan supports Hira,
76    // ja-Hira doesn't support Jpan.
77    bool supportsHbScript(hb_script_t script) const;
78
79    std::string getString() const;
80
81    // Calculates a matching score. This score represents how well the input languages cover this
82    // language. The maximum score in the language list is returned.
83    // 0 = no match, 1 = script match, 2 = script and primary language match.
84    int calcScoreFor(const FontLanguages& supported) const;
85
86    uint64_t getIdentifier() const {
87        return ((uint64_t)mLanguage << 49) | ((uint64_t)mScript << 17) | ((uint64_t)mRegion << 2) |
88                mEmojiStyle;
89    }
90
91private:
92    friend class FontLanguages;  // for FontLanguages constructor
93
94    // ISO 15924 compliant script code. The 4 chars script code are packed into a 32 bit integer.
95    uint32_t mScript;
96
97    // ISO 639-1 or ISO 639-2 compliant language code.
98    // The two- or three-letter language code is packed into a 15 bit integer.
99    // mLanguage = 0 means the FontLanguage is unsupported.
100    uint16_t mLanguage;
101
102    // ISO 3166-1 or UN M.49 compliant region code. The two-letter or three-digit region code is
103    // packed into a 15 bit integer.
104    uint16_t mRegion;
105
106    // The language to be passed HarfBuzz shaper.
107    hb_language_t mHbLanguage;
108
109    // For faster comparing, use 7 bits for specific scripts.
110    static const uint8_t kBopomofoFlag = 1u;
111    static const uint8_t kHanFlag = 1u << 1;
112    static const uint8_t kHangulFlag = 1u << 2;
113    static const uint8_t kHiraganaFlag = 1u << 3;
114    static const uint8_t kKatakanaFlag = 1u << 4;
115    static const uint8_t kSimplifiedChineseFlag = 1u << 5;
116    static const uint8_t kTraditionalChineseFlag = 1u << 6;
117    uint8_t mSubScriptBits;
118
119    EmojiStyle mEmojiStyle;
120
121    static uint8_t scriptToSubScriptBits(uint32_t script);
122
123    static EmojiStyle resolveEmojiStyle(const char* buf, size_t length, uint32_t script);
124
125    // Returns true if the provide subscript bits has the requested subscript bits.
126    // Note that this function returns false if the requested subscript bits are empty.
127    static bool supportsScript(uint8_t providedBits, uint8_t requestedBits);
128};
129
130// An immutable list of languages.
131class FontLanguages {
132public:
133    explicit FontLanguages(std::vector<FontLanguage>&& languages);
134    FontLanguages() : mUnionOfSubScriptBits(0), mIsAllTheSameLanguage(false) {}
135    FontLanguages(FontLanguages&&) = default;
136
137    size_t size() const { return mLanguages.size(); }
138    bool empty() const { return mLanguages.empty(); }
139    const FontLanguage& operator[] (size_t n) const { return mLanguages[n]; }
140
141private:
142    friend struct FontLanguage;  // for calcScoreFor
143
144    std::vector<FontLanguage> mLanguages;
145    uint8_t mUnionOfSubScriptBits;
146    bool mIsAllTheSameLanguage;
147
148    uint8_t getUnionOfSubScriptBits() const { return mUnionOfSubScriptBits; }
149    bool isAllTheSameLanguage() const { return mIsAllTheSameLanguage; }
150
151    // Do not copy and assign.
152    FontLanguages(const FontLanguages&) = delete;
153    void operator=(const FontLanguages&) = delete;
154};
155
156}  // namespace minikin
157
158#endif  // MINIKIN_FONT_LANGUAGE_H
159