1198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka/*
2198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * Copyright (C) 2015 The Android Open Source Project
3198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka *
4198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * Licensed under the Apache License, Version 2.0 (the "License");
5198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * you may not use this file except in compliance with the License.
6198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * You may obtain a copy of the License at
7198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka *
8198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka *      http://www.apache.org/licenses/LICENSE-2.0
9198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka *
10198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * Unless required by applicable law or agreed to in writing, software
11198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * distributed under the License is distributed on an "AS IS" BASIS,
12198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * See the License for the specific language governing permissions and
14198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * limitations under the License.
15198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka */
16198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
17198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#ifndef MINIKIN_FONT_LANGUAGE_H
18198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#define MINIKIN_FONT_LANGUAGE_H
19198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
20198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#include <string>
21198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#include <vector>
22198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
23198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#include <hb.h>
24198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
2514e2d136aaef271ba131f917cf5f27baa31ae5adSeigo Nonakanamespace minikin {
26198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
27d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang// Due to the limits in font fallback score calculation, we can't use anything more than 12
28f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka// languages.
29d478da324d3b98e736b6c42415c02d506cfb653fYirui Huangconst size_t FONT_LANGUAGES_LIMIT = 12;
3022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka
3122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka// The language or region code is encoded to 15 bits.
3222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonakaconst uint16_t INVALID_CODE = 0x7fff;
3322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka
34f3afe92def0fff022889fd036d68451223aac146Seigo Nonakaclass FontLanguages;
35f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka
36198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka// FontLanguage is a compact representation of a BCP 47 language tag. It
37198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka// does not capture all possible information, only what directly affects
38198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka// font rendering.
39198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonakastruct FontLanguage {
40198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonakapublic:
41e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    enum EmojiStyle : uint8_t {
42e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui        EMSTYLE_EMPTY = 0,
43e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui        EMSTYLE_DEFAULT = 1,
44e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui        EMSTYLE_EMOJI = 2,
45e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui        EMSTYLE_TEXT = 3,
46e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    };
47198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    // Default constructor creates the unsupported language.
48e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    FontLanguage()
49e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui            : mScript(0ul),
5022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka            mLanguage(INVALID_CODE),
5122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka            mRegion(INVALID_CODE),
5222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka            mHbLanguage(HB_LANGUAGE_INVALID),
53e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui            mSubScriptBits(0ul),
54e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui            mEmojiStyle(EMSTYLE_EMPTY) {}
55198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
56198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    // Parse from string
57198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    FontLanguage(const char* buf, size_t length);
58198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
59198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    bool operator==(const FontLanguage other) const {
60e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui        return !isUnsupported() && isEqualScript(other) && mLanguage == other.mLanguage &&
6122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka                mRegion == other.mRegion && mEmojiStyle == other.mEmojiStyle;
62198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    }
63198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
64198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    bool operator!=(const FontLanguage other) const {
65198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        return !(*this == other);
66198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    }
67198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
6822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    bool isUnsupported() const { return mLanguage == INVALID_CODE; }
69e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    EmojiStyle getEmojiStyle() const { return mEmojiStyle; }
7022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    hb_language_t getHbLanguage() const { return mHbLanguage; }
7122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka
72198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
736f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka    bool isEqualScript(const FontLanguage& other) const;
74198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
75198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    // Returns true if this script supports the given script. For example, ja-Jpan supports Hira,
76198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    // ja-Hira doesn't support Jpan.
77198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    bool supportsHbScript(hb_script_t script) const;
78198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
79198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    std::string getString() const;
80198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
81f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    // Calculates a matching score. This score represents how well the input languages cover this
82f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    // language. The maximum score in the language list is returned.
836f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka    // 0 = no match, 1 = script match, 2 = script and primary language match.
84f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    int calcScoreFor(const FontLanguages& supported) const;
85198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
86e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    uint64_t getIdentifier() const {
8722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        return ((uint64_t)mLanguage << 49) | ((uint64_t)mScript << 17) | ((uint64_t)mRegion << 2) |
8822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka                mEmojiStyle;
89e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    }
90198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
91198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonakaprivate:
92f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    friend class FontLanguages;  // for FontLanguages constructor
93f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka
94198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    // ISO 15924 compliant script code. The 4 chars script code are packed into a 32 bit integer.
95198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    uint32_t mScript;
96198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
97198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    // ISO 639-1 or ISO 639-2 compliant language code.
9822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    // The two- or three-letter language code is packed into a 15 bit integer.
99198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    // mLanguage = 0 means the FontLanguage is unsupported.
10022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    uint16_t mLanguage;
10122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka
10222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    // ISO 3166-1 or UN M.49 compliant region code. The two-letter or three-digit region code is
10322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    // packed into a 15 bit integer.
10422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    uint16_t mRegion;
10522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka
10622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    // The language to be passed HarfBuzz shaper.
10722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    hb_language_t mHbLanguage;
108198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
109e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    // For faster comparing, use 7 bits for specific scripts.
110533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka    static const uint8_t kBopomofoFlag = 1u;
111e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    static const uint8_t kHanFlag = 1u << 1;
112e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    static const uint8_t kHangulFlag = 1u << 2;
113e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    static const uint8_t kHiraganaFlag = 1u << 3;
114e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    static const uint8_t kKatakanaFlag = 1u << 4;
115e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    static const uint8_t kSimplifiedChineseFlag = 1u << 5;
116e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    static const uint8_t kTraditionalChineseFlag = 1u << 6;
117198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    uint8_t mSubScriptBits;
118198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
119e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    EmojiStyle mEmojiStyle;
120e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui
121198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    static uint8_t scriptToSubScriptBits(uint32_t script);
122f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka
12322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    static EmojiStyle resolveEmojiStyle(const char* buf, size_t length, uint32_t script);
12422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka
125f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    // Returns true if the provide subscript bits has the requested subscript bits.
126f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    // Note that this function returns false if the requested subscript bits are empty.
127f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    static bool supportsScript(uint8_t providedBits, uint8_t requestedBits);
128198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka};
129198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
130f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka// An immutable list of languages.
131f3afe92def0fff022889fd036d68451223aac146Seigo Nonakaclass FontLanguages {
132f3afe92def0fff022889fd036d68451223aac146Seigo Nonakapublic:
13347b905f6840ea6776d6f6778915e7408a1ff8be4Chih-Hung Hsieh    explicit FontLanguages(std::vector<FontLanguage>&& languages);
134f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    FontLanguages() : mUnionOfSubScriptBits(0), mIsAllTheSameLanguage(false) {}
135f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    FontLanguages(FontLanguages&&) = default;
136f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka
137f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    size_t size() const { return mLanguages.size(); }
138f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    bool empty() const { return mLanguages.empty(); }
139f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    const FontLanguage& operator[] (size_t n) const { return mLanguages[n]; }
140f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka
141f3afe92def0fff022889fd036d68451223aac146Seigo Nonakaprivate:
142f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    friend struct FontLanguage;  // for calcScoreFor
143f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka
144f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    std::vector<FontLanguage> mLanguages;
145f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    uint8_t mUnionOfSubScriptBits;
146f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    bool mIsAllTheSameLanguage;
147f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka
148f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    uint8_t getUnionOfSubScriptBits() const { return mUnionOfSubScriptBits; }
149f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    bool isAllTheSameLanguage() const { return mIsAllTheSameLanguage; }
150f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka
151f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    // Do not copy and assign.
152f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    FontLanguages(const FontLanguages&) = delete;
153f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    void operator=(const FontLanguages&) = delete;
154f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka};
155198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
15614e2d136aaef271ba131f917cf5f27baa31ae5adSeigo Nonaka}  // namespace minikin
157198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
158198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#endif  // MINIKIN_FONT_LANGUAGE_H
159