FontLanguage.cpp revision 533a01ea8438bb102b0dbc71f6c4ef356b260ed5
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "Minikin"
18
19#include "FontLanguage.h"
20
21#include <hb.h>
22#include <unicode/uloc.h>
23
24namespace android {
25
26#define SCRIPT_TAG(c1, c2, c3, c4) \
27        (((uint32_t)(c1)) << 24 | ((uint32_t)(c2)) << 16 | ((uint32_t)(c3)) <<  8 | \
28         ((uint32_t)(c4)))
29
30// Parse BCP 47 language identifier into internal structure
31FontLanguage::FontLanguage(const char* buf, size_t length) : FontLanguage() {
32    size_t i;
33    for (i = 0; i < length; i++) {
34        char c = buf[i];
35        if (c == '-' || c == '_') break;
36    }
37    if (i == 2 || i == 3) {  // only accept two or three letter language code.
38        mLanguage = buf[0] | (buf[1] << 8) | ((i == 3) ? (buf[2] << 16) : 0);
39    } else {
40        // We don't understand anything other than two-letter or three-letter
41        // language codes, so we skip parsing the rest of the string.
42        mLanguage = 0ul;
43        return;
44    }
45
46    size_t next;
47    for (i++; i < length; i = next + 1) {
48        for (next = i; next < length; next++) {
49            char c = buf[next];
50            if (c == '-' || c == '_') break;
51        }
52        if (next - i == 4 && 'A' <= buf[i] && buf[i] <= 'Z') {
53            mScript = SCRIPT_TAG(buf[i], buf[i + 1], buf[i + 2], buf[i + 3]);
54        }
55    }
56
57    mSubScriptBits = scriptToSubScriptBits(mScript);
58}
59
60//static
61uint8_t FontLanguage::scriptToSubScriptBits(uint32_t script) {
62    uint8_t subScriptBits = 0u;
63    switch (script) {
64        case SCRIPT_TAG('B', 'o', 'p', 'o'):
65            subScriptBits = kBopomofoFlag;
66            break;
67        case SCRIPT_TAG('H', 'a', 'n', 'g'):
68            subScriptBits = kHangulFlag;
69            break;
70        case SCRIPT_TAG('H', 'a', 'n', 'b'):
71            // Bopomofo is almost exclusively used in Taiwan.
72            subScriptBits = kHanFlag | kBopomofoFlag;
73            break;
74        case SCRIPT_TAG('H', 'a', 'n', 'i'):
75            subScriptBits = kHanFlag;
76            break;
77        case SCRIPT_TAG('H', 'a', 'n', 's'):
78            subScriptBits = kHanFlag | kSimplifiedChineseFlag;
79            break;
80        case SCRIPT_TAG('H', 'a', 'n', 't'):
81            subScriptBits = kHanFlag | kTraditionalChineseFlag;
82            break;
83        case SCRIPT_TAG('H', 'i', 'r', 'a'):
84            subScriptBits = kHiraganaFlag;
85            break;
86        case SCRIPT_TAG('H', 'r', 'k', 't'):
87            subScriptBits = kKatakanaFlag | kHiraganaFlag;
88            break;
89        case SCRIPT_TAG('J', 'p', 'a', 'n'):
90            subScriptBits = kHanFlag | kKatakanaFlag | kHiraganaFlag;
91            break;
92        case SCRIPT_TAG('K', 'a', 'n', 'a'):
93            subScriptBits = kKatakanaFlag;
94            break;
95        case SCRIPT_TAG('K', 'o', 'r', 'e'):
96            subScriptBits = kHanFlag | kHangulFlag;
97            break;
98        case SCRIPT_TAG('Z', 's', 'y', 'e'):
99            subScriptBits = kEmojiFlag;
100            break;
101    }
102    return subScriptBits;
103}
104
105std::string FontLanguage::getString() const {
106    if (mLanguage == 0ul) {
107        return "und";
108    }
109    char buf[16];
110    size_t i = 0;
111    buf[i++] = mLanguage & 0xFF ;
112    buf[i++] = (mLanguage >> 8) & 0xFF;
113    char third_letter = (mLanguage >> 16) & 0xFF;
114    if (third_letter != 0) buf[i++] = third_letter;
115    if (mScript != 0) {
116      buf[i++] = '-';
117      buf[i++] = (mScript >> 24) & 0xFFu;
118      buf[i++] = (mScript >> 16) & 0xFFu;
119      buf[i++] = (mScript >> 8) & 0xFFu;
120      buf[i++] = mScript & 0xFFu;
121    }
122    return std::string(buf, i);
123}
124
125bool FontLanguage::isEqualScript(const FontLanguage& other) const {
126    return other.mScript == mScript;
127}
128
129bool FontLanguage::supportsScript(uint8_t requestedBits) const {
130    return requestedBits != 0 && (mSubScriptBits & requestedBits) == requestedBits;
131}
132
133bool FontLanguage::supportsHbScript(hb_script_t script) const {
134    static_assert(SCRIPT_TAG('J', 'p', 'a', 'n') == HB_TAG('J', 'p', 'a', 'n'),
135                  "The Minikin script and HarfBuzz hb_script_t have different encodings.");
136    if (script == mScript) return true;
137    return supportsScript(scriptToSubScriptBits(script));
138}
139
140int FontLanguage::getScoreFor(const FontLanguage other) const {
141    if (isUnsupported() || other.isUnsupported()) {
142        return 0;
143    } else if (isEqualScript(other) || supportsScript(other.mSubScriptBits)) {
144        return mLanguage == other.mLanguage ? 2 : 1;
145    } else {
146        return 0;
147    }
148}
149
150#undef SCRIPT_TAG
151}  // namespace android
152