Locale.cpp revision 22462be7358f1facbe0c0074f8e58a41c2314b6e
1198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka/*
2198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * Copyright (C) 2015 The Android Open Source Project
3198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka *
4198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * Licensed under the Apache License, Version 2.0 (the "License");
5198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * you may not use this file except in compliance with the License.
6198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * You may obtain a copy of the License at
7198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka *
8198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka *      http://www.apache.org/licenses/LICENSE-2.0
9198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka *
10198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * Unless required by applicable law or agreed to in writing, software
11198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * distributed under the License is distributed on an "AS IS" BASIS,
12198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * See the License for the specific language governing permissions and
14198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * limitations under the License.
15198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka */
16198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
17198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#define LOG_TAG "Minikin"
18198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
19198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#include "FontLanguage.h"
20198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
21e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui#include <algorithm>
22198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#include <hb.h>
23e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui#include <string.h>
24198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#include <unicode/uloc.h>
25198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
2614e2d136aaef271ba131f917cf5f27baa31ae5adSeigo Nonakanamespace minikin {
27198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
28198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#define SCRIPT_TAG(c1, c2, c3, c4) \
29198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        (((uint32_t)(c1)) << 24 | ((uint32_t)(c2)) << 16 | ((uint32_t)(c3)) <<  8 | \
30198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka         ((uint32_t)(c4)))
31198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
32e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui// Check if a language code supports emoji according to its subtag
33e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yiruistatic bool isEmojiSubtag(const char* buf, size_t bufLen, const char* subtag, size_t subtagLen) {
34e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    if (bufLen < subtagLen) {
35e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui        return false;
36e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    }
37e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    if (strncmp(buf, subtag, subtagLen) != 0) {
38e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui        return false;  // no match between two strings
39e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    }
40e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    return (bufLen == subtagLen || buf[subtagLen] == '\0' ||
41e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui            buf[subtagLen] == '-' || buf[subtagLen] == '_');
42e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui}
43e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui
4422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka// Pack the three letter code into 15 bits and stored to 16 bit integer. The highest bit is 0.
4522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka// For the region code, the letters must be all digits in three letter case, so the number of
4622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka// possible values are 10. For the language code, the letters must be all small alphabets, so the
4722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka// number of possible values are 26. Thus, 5 bits are sufficient for each case and we can pack the
4822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka// three letter language code or region code to 15 bits.
4922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka//
5022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka// In case of two letter code, use fullbit(0x1f) for the first letter instead.
5122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonakastatic uint16_t packLanguageOrRegion(const char* c, size_t length, uint8_t twoLetterBase,
5222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        uint8_t threeLetterBase) {
5322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    if (length == 2) {
5422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        return 0x7c00u |  // 0x1fu << 10
5522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka                (uint16_t)(c[0] - twoLetterBase) << 5 |
5622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka                (uint16_t)(c[1] - twoLetterBase);
5722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    } else {
5822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        return ((uint16_t)(c[0] - threeLetterBase) << 10) |
5922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka                (uint16_t)(c[1] - threeLetterBase) << 5 |
6022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka                (uint16_t)(c[2] - threeLetterBase);
6122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    }
6222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka}
6322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka
6422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonakastatic size_t unpackLanguageOrRegion(uint16_t in, char* out, uint8_t twoLetterBase,
6522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        uint8_t threeLetterBase) {
6622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    uint8_t first = (in >> 10) & 0x1f;
6722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    uint8_t second = (in >> 5) & 0x1f;
6822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    uint8_t third = in & 0x1f;
6922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka
7022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    if (first == 0x1f) {
7122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        out[0] = second + twoLetterBase;
7222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        out[1] = third + twoLetterBase;
7322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        return 2;
7422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    } else {
7522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        out[0] = first + threeLetterBase;
7622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        out[1] = second + threeLetterBase;
7722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        out[2] = third + threeLetterBase;
7822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        return 3;
7922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    }
8022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka}
8122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka
8222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka// Find the next '-' or '_' index from startOffset position. If not found, returns bufferLength.
8322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonakastatic size_t nextDelimiterIndex(const char* buffer, size_t bufferLength, size_t startOffset) {
8422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    for (size_t i = startOffset; i < bufferLength; ++i) {
8522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        if (buffer[i] == '-' || buffer[i] == '_') {
8622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka            return i;
8722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        }
8822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    }
8922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    return bufferLength;
9022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka}
9122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka
9222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonakastatic inline bool isLowercase(char c) {
9322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    return 'a' <= c && c <= 'z';
9422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka}
9522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka
9622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonakastatic inline bool isUppercase(char c) {
9722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    return 'A' <= c && c <= 'Z';
9822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka}
9922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka
10022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonakastatic inline bool isDigit(char c) {
10122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    return '0' <= c && c <= '9';
10222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka}
10322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka
10422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka// Returns true if the buffer is valid for language code.
10522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonakastatic inline bool isValidLanguageCode(const char* buffer, size_t length) {
10622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    if (length != 2 && length != 3) return false;
10722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    if (!isLowercase(buffer[0])) return false;
10822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    if (!isLowercase(buffer[1])) return false;
10922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    if (length == 3 && !isLowercase(buffer[2])) return false;
11022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    return true;
11122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka}
11222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka
11322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka// Returns true if buffer is valid for script code. The length of buffer must be 4.
11422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonakastatic inline bool isValidScriptCode(const char* buffer) {
11522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    return isUppercase(buffer[0]) && isLowercase(buffer[1]) && isLowercase(buffer[2]) &&
11622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        isLowercase(buffer[3]);
11722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka}
11822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka
11922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka// Returns true if the buffer is valid for region code.
12022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonakastatic inline bool isValidRegionCode(const char* buffer, size_t length) {
12122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    return (length == 2 && isUppercase(buffer[0]) && isUppercase(buffer[1])) ||
12222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka            (length == 3 && isDigit(buffer[0]) && isDigit(buffer[1]) && isDigit(buffer[2]));
12322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka}
12422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka
125198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka// Parse BCP 47 language identifier into internal structure
126198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo NonakaFontLanguage::FontLanguage(const char* buf, size_t length) : FontLanguage() {
12722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    size_t firstDelimiterPos = nextDelimiterIndex(buf, length, 0);
12822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    if (isValidLanguageCode(buf, firstDelimiterPos)) {
12922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        mLanguage = packLanguageOrRegion(buf, firstDelimiterPos, 'a', 'a');
130198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    } else {
131198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        // We don't understand anything other than two-letter or three-letter
132198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        // language codes, so we skip parsing the rest of the string.
133198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        return;
134198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    }
135198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
13622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    if (firstDelimiterPos == length) {
13722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        mHbLanguage = hb_language_from_string(getString().c_str(), -1);
13822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        return;  // Language code only.
13922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    }
14022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka
14122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    size_t nextComponentStartPos = firstDelimiterPos + 1;
14222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    size_t nextDelimiterPos = nextDelimiterIndex(buf, length, nextComponentStartPos);
14322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    size_t componentLength = nextDelimiterPos - nextComponentStartPos;
14422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka
14522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    if (componentLength == 4) {
14622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        // Possibly script code.
14722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        const char* p = buf + nextComponentStartPos;
14822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        if (isValidScriptCode(p)) {
14922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka            mScript = SCRIPT_TAG(p[0], p[1], p[2], p[3]);
15022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka            mSubScriptBits = scriptToSubScriptBits(mScript);
151198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        }
15222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka
15322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        if (nextDelimiterPos == length) {
15422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka            mHbLanguage = hb_language_from_string(getString().c_str(), -1);
15522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka            mEmojiStyle = resolveEmojiStyle(buf, length, mScript);
15622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka            return;  // No region code.
157198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        }
15822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka
15922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        nextComponentStartPos = nextDelimiterPos + 1;
16022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        nextDelimiterPos = nextDelimiterIndex(buf, length, nextComponentStartPos);
16122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        componentLength = nextDelimiterPos - nextComponentStartPos;
162198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    }
163e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui
16422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    if (componentLength == 2 || componentLength == 3) {
16522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        // Possibly region code.
16622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        const char* p = buf + nextComponentStartPos;
16722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        if (isValidRegionCode(p, componentLength)) {
16822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka            mRegion = packLanguageOrRegion(p, componentLength, 'A', '0');
16922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        }
170e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    }
17122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka
17222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    mHbLanguage = hb_language_from_string(getString().c_str(), -1);
17322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    mEmojiStyle = resolveEmojiStyle(buf, length, mScript);
17422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka}
17522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka
17622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka// static
17722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo NonakaFontLanguage::EmojiStyle FontLanguage::resolveEmojiStyle(const char* buf, size_t length,
17822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        uint32_t script) {
17922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    // First, lookup emoji subtag.
180e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    // 10 is the length of "-u-em-text", which is the shortest emoji subtag,
181e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    // unnecessary comparison can be avoided if total length is smaller than 10.
182e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    const size_t kMinSubtagLength = 10;
18322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    if (length >= kMinSubtagLength) {
18422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        static const char kPrefix[] = "-u-em-";
18522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        const char *pos = std::search(buf, buf + length, kPrefix, kPrefix + strlen(kPrefix));
18622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        if (pos != buf + length) {  // found
18722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka            pos += strlen(kPrefix);
18822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka            const size_t remainingLength = length - (pos - buf);
18922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka            if (isEmojiSubtag(pos, remainingLength, "emoji", 5)){
19022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka                return EMSTYLE_EMOJI;
19122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka            } else if (isEmojiSubtag(pos, remainingLength, "text", 4)){
19222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka                return EMSTYLE_TEXT;
19322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka            } else if (isEmojiSubtag(pos, remainingLength, "default", 7)){
19422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka                return EMSTYLE_DEFAULT;
19522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka            }
19622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        }
197e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    }
198e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui
19922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    // If no emoji subtag was provided, resolve the emoji style from script code.
20022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    if (script == SCRIPT_TAG('Z', 's', 'y', 'e')) {
20122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        return EMSTYLE_EMOJI;
20222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    } else if (script == SCRIPT_TAG('Z', 's', 'y', 'm')) {
20322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        return EMSTYLE_TEXT;
204e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui    }
20522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka
20622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    return EMSTYLE_EMPTY;
207198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka}
208198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
209198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka//static
210198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonakauint8_t FontLanguage::scriptToSubScriptBits(uint32_t script) {
211198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    uint8_t subScriptBits = 0u;
212198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    switch (script) {
213533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka        case SCRIPT_TAG('B', 'o', 'p', 'o'):
214533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka            subScriptBits = kBopomofoFlag;
215533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka            break;
216198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        case SCRIPT_TAG('H', 'a', 'n', 'g'):
217198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            subScriptBits = kHangulFlag;
218198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            break;
219533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka        case SCRIPT_TAG('H', 'a', 'n', 'b'):
220533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka            // Bopomofo is almost exclusively used in Taiwan.
221533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka            subScriptBits = kHanFlag | kBopomofoFlag;
222533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka            break;
223198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        case SCRIPT_TAG('H', 'a', 'n', 'i'):
224198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            subScriptBits = kHanFlag;
225198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            break;
226198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        case SCRIPT_TAG('H', 'a', 'n', 's'):
227198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            subScriptBits = kHanFlag | kSimplifiedChineseFlag;
228198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            break;
229198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        case SCRIPT_TAG('H', 'a', 'n', 't'):
230198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            subScriptBits = kHanFlag | kTraditionalChineseFlag;
231198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            break;
232198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        case SCRIPT_TAG('H', 'i', 'r', 'a'):
233198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            subScriptBits = kHiraganaFlag;
234198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            break;
235198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        case SCRIPT_TAG('H', 'r', 'k', 't'):
236198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            subScriptBits = kKatakanaFlag | kHiraganaFlag;
237198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            break;
238198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        case SCRIPT_TAG('J', 'p', 'a', 'n'):
239198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            subScriptBits = kHanFlag | kKatakanaFlag | kHiraganaFlag;
240198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            break;
241198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        case SCRIPT_TAG('K', 'a', 'n', 'a'):
242198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            subScriptBits = kKatakanaFlag;
243198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            break;
244198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        case SCRIPT_TAG('K', 'o', 'r', 'e'):
245198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            subScriptBits = kHanFlag | kHangulFlag;
246198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka            break;
247198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    }
248198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    return subScriptBits;
249198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka}
250198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
251198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonakastd::string FontLanguage::getString() const {
25222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    if (isUnsupported()) {
253198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka        return "und";
254198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    }
255198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    char buf[16];
25622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    size_t i = unpackLanguageOrRegion(mLanguage, buf, 'a', 'a');
257198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    if (mScript != 0) {
25822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        buf[i++] = '-';
25922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        buf[i++] = (mScript >> 24) & 0xFFu;
26022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        buf[i++] = (mScript >> 16) & 0xFFu;
26122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        buf[i++] = (mScript >> 8) & 0xFFu;
26222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        buf[i++] = mScript & 0xFFu;
26322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    }
26422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka    if (mRegion != INVALID_CODE) {
26522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        buf[i++] = '-';
26622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka        i += unpackLanguageOrRegion(mRegion, buf + i, 'A', '0');
267198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    }
268198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    return std::string(buf, i);
269198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka}
270198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
2716f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonakabool FontLanguage::isEqualScript(const FontLanguage& other) const {
272198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    return other.mScript == mScript;
273198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka}
274198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
275f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka// static
276f3afe92def0fff022889fd036d68451223aac146Seigo Nonakabool FontLanguage::supportsScript(uint8_t providedBits, uint8_t requestedBits) {
277f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    return requestedBits != 0 && (providedBits & requestedBits) == requestedBits;
2786f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka}
2796f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka
280198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonakabool FontLanguage::supportsHbScript(hb_script_t script) const {
281198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    static_assert(SCRIPT_TAG('J', 'p', 'a', 'n') == HB_TAG('J', 'p', 'a', 'n'),
282198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka                  "The Minikin script and HarfBuzz hb_script_t have different encodings.");
283198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka    if (script == mScript) return true;
284f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    return supportsScript(mSubScriptBits, scriptToSubScriptBits(script));
285198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka}
286198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
287f3afe92def0fff022889fd036d68451223aac146Seigo Nonakaint FontLanguage::calcScoreFor(const FontLanguages& supported) const {
288d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang    bool languageScriptMatch = false;
289d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang    bool subtagMatch = false;
290d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang    bool scriptMatch = false;
291d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang
292f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    for (size_t i = 0; i < supported.size(); ++i) {
293d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang        if (mEmojiStyle != EMSTYLE_EMPTY &&
294d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang               mEmojiStyle == supported[i].mEmojiStyle) {
295d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang            subtagMatch = true;
296d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang            if (mLanguage == supported[i].mLanguage) {
297d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang                return 4;
298d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang            }
299d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang        }
300f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka        if (isEqualScript(supported[i]) ||
301f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka                supportsScript(supported[i].mSubScriptBits, mSubScriptBits)) {
302d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang            scriptMatch = true;
303f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka            if (mLanguage == supported[i].mLanguage) {
304d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang                languageScriptMatch = true;
305f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka            }
306f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka        }
307f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    }
308f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka
309f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    if (supportsScript(supported.getUnionOfSubScriptBits(), mSubScriptBits)) {
310d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang        scriptMatch = true;
311d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang        if (mLanguage == supported[0].mLanguage && supported.isAllTheSameLanguage()) {
312d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang            return 3;
313d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang        }
314f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    }
315f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka
316d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang    if (languageScriptMatch) {
317d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang        return 3;
318d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang    } else if (subtagMatch) {
319d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang        return 2;
320d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang    } else if (scriptMatch) {
321d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang        return 1;
322d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang    }
323f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    return 0;
324f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka}
325f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka
326f3afe92def0fff022889fd036d68451223aac146Seigo NonakaFontLanguages::FontLanguages(std::vector<FontLanguage>&& languages)
327f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    : mLanguages(std::move(languages)) {
328f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    if (mLanguages.empty()) {
329f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka        return;
330f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    }
331f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka
332f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    const FontLanguage& lang = mLanguages[0];
333f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka
334f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    mIsAllTheSameLanguage = true;
335f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    mUnionOfSubScriptBits = lang.mSubScriptBits;
336f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka    for (size_t i = 1; i < mLanguages.size(); ++i) {
337f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka        mUnionOfSubScriptBits |= mLanguages[i].mSubScriptBits;
338f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka        if (mIsAllTheSameLanguage && lang.mLanguage != mLanguages[i].mLanguage) {
339f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka            mIsAllTheSameLanguage = false;
340f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka        }
3416f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka    }
342198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka}
343198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka
344198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#undef SCRIPT_TAG
34514e2d136aaef271ba131f917cf5f27baa31ae5adSeigo Nonaka}  // namespace minikin
346