1198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka/*
2198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * Copyright (C) 2015 The Android Open Source Project
3198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka *
4198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * Licensed under the Apache License, Version 2.0 (the "License");
5198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * you may not use this file except in compliance with the License.
6198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * You may obtain a copy of the License at
7198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka *
8198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka *      http://www.apache.org/licenses/LICENSE-2.0
9198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka *
10198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * Unless required by applicable law or agreed to in writing, software
11198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * distributed under the License is distributed on an "AS IS" BASIS,
12198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * See the License for the specific language governing permissions and
14198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * limitations under the License.
15198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka */
16198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
17198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#define LOG_TAG "Minikin"
18198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
19198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#include "FontLanguage.h"
20198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
21198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#include <hb.h>
22198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#include <unicode/uloc.h>
23198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
24198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonakanamespace android {
25198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
26198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#define SCRIPT_TAG(c1, c2, c3, c4) \
27198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        (((uint32_t)(c1)) << 24 | ((uint32_t)(c2)) << 16 | ((uint32_t)(c3)) <<  8 | \
28198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka         ((uint32_t)(c4)))
29198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
30198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka// Parse BCP 47 language identifier into internal structure
31198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo NonakaFontLanguage::FontLanguage(const char* buf, size_t length) : FontLanguage() {
32198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    size_t i;
33198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    for (i = 0; i < length; i++) {
34198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        char c = buf[i];
35198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        if (c == '-' || c == '_') break;
36198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    }
37198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    if (i == 2 || i == 3) {  // only accept two or three letter language code.
38198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        mLanguage = buf[0] | (buf[1] << 8) | ((i == 3) ? (buf[2] << 16) : 0);
39198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    } else {
40198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        // We don't understand anything other than two-letter or three-letter
41198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        // language codes, so we skip parsing the rest of the string.
42198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        mLanguage = 0ul;
43198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        return;
44198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    }
45198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
46198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    size_t next;
47198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    for (i++; i < length; i = next + 1) {
48198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        for (next = i; next < length; next++) {
49198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            char c = buf[next];
50198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            if (c == '-' || c == '_') break;
51198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        }
52198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        if (next - i == 4 && 'A' <= buf[i] && buf[i] <= 'Z') {
53198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            mScript = SCRIPT_TAG(buf[i], buf[i + 1], buf[i + 2], buf[i + 3]);
54198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        }
55198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    }
56198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
57198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    mSubScriptBits = scriptToSubScriptBits(mScript);
58198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka}
59198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
60198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka//static
61198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonakauint8_t FontLanguage::scriptToSubScriptBits(uint32_t script) {
62198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    uint8_t subScriptBits = 0u;
63198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    switch (script) {
64533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka        case SCRIPT_TAG('B', 'o', 'p', 'o'):
65533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka            subScriptBits = kBopomofoFlag;
66533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka            break;
67198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        case SCRIPT_TAG('H', 'a', 'n', 'g'):
68198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            subScriptBits = kHangulFlag;
69198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            break;
70533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka        case SCRIPT_TAG('H', 'a', 'n', 'b'):
71533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka            // Bopomofo is almost exclusively used in Taiwan.
72533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka            subScriptBits = kHanFlag | kBopomofoFlag;
73533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka            break;
74198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        case SCRIPT_TAG('H', 'a', 'n', 'i'):
75198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            subScriptBits = kHanFlag;
76198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            break;
77198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        case SCRIPT_TAG('H', 'a', 'n', 's'):
78198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            subScriptBits = kHanFlag | kSimplifiedChineseFlag;
79198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            break;
80198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        case SCRIPT_TAG('H', 'a', 'n', 't'):
81198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            subScriptBits = kHanFlag | kTraditionalChineseFlag;
82198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            break;
83198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        case SCRIPT_TAG('H', 'i', 'r', 'a'):
84198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            subScriptBits = kHiraganaFlag;
85198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            break;
86198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        case SCRIPT_TAG('H', 'r', 'k', 't'):
87198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            subScriptBits = kKatakanaFlag | kHiraganaFlag;
88198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            break;
89198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        case SCRIPT_TAG('J', 'p', 'a', 'n'):
90198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            subScriptBits = kHanFlag | kKatakanaFlag | kHiraganaFlag;
91198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            break;
92198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        case SCRIPT_TAG('K', 'a', 'n', 'a'):
93198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            subScriptBits = kKatakanaFlag;
94198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            break;
95198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        case SCRIPT_TAG('K', 'o', 'r', 'e'):
96198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            subScriptBits = kHanFlag | kHangulFlag;
97198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            break;
98d3376522332e1e016e59fabb22c24025092c724dSeigo Nonaka        case SCRIPT_TAG('Z', 's', 'y', 'e'):
99198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            subScriptBits = kEmojiFlag;
100198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            break;
101198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    }
102198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    return subScriptBits;
103198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka}
104198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
105198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonakastd::string FontLanguage::getString() const {
106198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    if (mLanguage == 0ul) {
107198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        return "und";
108198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    }
109198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    char buf[16];
110198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    size_t i = 0;
111198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    buf[i++] = mLanguage & 0xFF ;
112198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    buf[i++] = (mLanguage >> 8) & 0xFF;
113198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    char third_letter = (mLanguage >> 16) & 0xFF;
114198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    if (third_letter != 0) buf[i++] = third_letter;
115198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    if (mScript != 0) {
116198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka      buf[i++] = '-';
117198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka      buf[i++] = (mScript >> 24) & 0xFFu;
118198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka      buf[i++] = (mScript >> 16) & 0xFFu;
119198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka      buf[i++] = (mScript >> 8) & 0xFFu;
120198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka      buf[i++] = mScript & 0xFFu;
121198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    }
122198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    return std::string(buf, i);
123198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka}
124198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
1256f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonakabool FontLanguage::isEqualScript(const FontLanguage& other) const {
126198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    return other.mScript == mScript;
127198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka}
128198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
129f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka// static
130f3afe92def0fff022889fd036d68451223aac146Seigo Nonakabool FontLanguage::supportsScript(uint8_t providedBits, uint8_t requestedBits) {
131f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    return requestedBits != 0 && (providedBits & requestedBits) == requestedBits;
1326f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka}
1336f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka
134198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonakabool FontLanguage::supportsHbScript(hb_script_t script) const {
135198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    static_assert(SCRIPT_TAG('J', 'p', 'a', 'n') == HB_TAG('J', 'p', 'a', 'n'),
136198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka                  "The Minikin script and HarfBuzz hb_script_t have different encodings.");
137198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    if (script == mScript) return true;
138f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    return supportsScript(mSubScriptBits, scriptToSubScriptBits(script));
139198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka}
140198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
141f3afe92def0fff022889fd036d68451223aac146Seigo Nonakaint FontLanguage::calcScoreFor(const FontLanguages& supported) const {
142f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    int score = 0;
143f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    for (size_t i = 0; i < supported.size(); ++i) {
144f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka        if (isEqualScript(supported[i]) ||
145f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka                supportsScript(supported[i].mSubScriptBits, mSubScriptBits)) {
146f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka            if (mLanguage == supported[i].mLanguage) {
147f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka                return 2;
148f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka            } else {
149f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka                score = 1;
150f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka            }
151f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka        }
152f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    }
153f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka
154f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    if (score == 1) {
155f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka        return score;
156f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    }
157f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka
158f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    if (supportsScript(supported.getUnionOfSubScriptBits(), mSubScriptBits)) {
159f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka        // Gives score of 2 only if the language matches all of the font languages except for the
160f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka        // exact match case handled above.
161f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka        return (mLanguage == supported[0].mLanguage && supported.isAllTheSameLanguage()) ? 2 : 1;
162f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    }
163f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka
164f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    return 0;
165f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka}
166f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka
167f3afe92def0fff022889fd036d68451223aac146Seigo NonakaFontLanguages::FontLanguages(std::vector<FontLanguage>&& languages)
168f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    : mLanguages(std::move(languages)) {
169f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    if (mLanguages.empty()) {
170f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka        return;
171f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    }
172f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka
173f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    const FontLanguage& lang = mLanguages[0];
174f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka
175f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    mIsAllTheSameLanguage = true;
176f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    mUnionOfSubScriptBits = lang.mSubScriptBits;
177f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    for (size_t i = 1; i < mLanguages.size(); ++i) {
178f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka        mUnionOfSubScriptBits |= mLanguages[i].mSubScriptBits;
179f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka        if (mIsAllTheSameLanguage && lang.mLanguage != mLanguages[i].mLanguage) {
180f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka            mIsAllTheSameLanguage = false;
181f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka        }
1826f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka    }
183198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka}
184198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
185198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#undef SCRIPT_TAG
186198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka}  // namespace android
187