Locale.cpp revision 22462be7358f1facbe0c0074f8e58a41c2314b6e
1198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka/* 2198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * Copyright (C) 2015 The Android Open Source Project 3198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * 4198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * Licensed under the Apache License, Version 2.0 (the "License"); 5198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * you may not use this file except in compliance with the License. 6198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * You may obtain a copy of the License at 7198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * 8198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * http://www.apache.org/licenses/LICENSE-2.0 9198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * 10198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * Unless required by applicable law or agreed to in writing, software 11198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * distributed under the License is distributed on an "AS IS" BASIS, 12198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * See the License for the specific language governing permissions and 14198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * limitations under the License. 15198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka */ 16198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 17198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#define LOG_TAG "Minikin" 18198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 19198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#include "FontLanguage.h" 20198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 21e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui#include <algorithm> 22198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#include <hb.h> 23e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui#include <string.h> 24198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#include <unicode/uloc.h> 25198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 2614e2d136aaef271ba131f917cf5f27baa31ae5adSeigo Nonakanamespace minikin { 27198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 28198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#define SCRIPT_TAG(c1, c2, c3, c4) \ 29198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka (((uint32_t)(c1)) << 24 | ((uint32_t)(c2)) << 16 | ((uint32_t)(c3)) << 8 | \ 30198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka ((uint32_t)(c4))) 31198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 32e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui// Check if a language code supports emoji according to its subtag 33e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yiruistatic bool isEmojiSubtag(const char* buf, size_t bufLen, const char* subtag, size_t subtagLen) { 34e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui if (bufLen < subtagLen) { 35e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui return false; 36e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui } 37e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui if (strncmp(buf, subtag, subtagLen) != 0) { 38e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui return false; // no match between two strings 39e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui } 40e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui return (bufLen == subtagLen || buf[subtagLen] == '\0' || 41e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui buf[subtagLen] == '-' || buf[subtagLen] == '_'); 42e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui} 43e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui 4422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka// Pack the three letter code into 15 bits and stored to 16 bit integer. The highest bit is 0. 4522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka// For the region code, the letters must be all digits in three letter case, so the number of 4622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka// possible values are 10. For the language code, the letters must be all small alphabets, so the 4722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka// number of possible values are 26. Thus, 5 bits are sufficient for each case and we can pack the 4822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka// three letter language code or region code to 15 bits. 4922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka// 5022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka// In case of two letter code, use fullbit(0x1f) for the first letter instead. 5122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonakastatic uint16_t packLanguageOrRegion(const char* c, size_t length, uint8_t twoLetterBase, 5222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka uint8_t threeLetterBase) { 5322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka if (length == 2) { 5422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka return 0x7c00u | // 0x1fu << 10 5522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka (uint16_t)(c[0] - twoLetterBase) << 5 | 5622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka (uint16_t)(c[1] - twoLetterBase); 5722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka } else { 5822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka return ((uint16_t)(c[0] - threeLetterBase) << 10) | 5922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka (uint16_t)(c[1] - threeLetterBase) << 5 | 6022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka (uint16_t)(c[2] - threeLetterBase); 6122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka } 6222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka} 6322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka 6422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonakastatic size_t unpackLanguageOrRegion(uint16_t in, char* out, uint8_t twoLetterBase, 6522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka uint8_t threeLetterBase) { 6622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka uint8_t first = (in >> 10) & 0x1f; 6722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka uint8_t second = (in >> 5) & 0x1f; 6822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka uint8_t third = in & 0x1f; 6922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka 7022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka if (first == 0x1f) { 7122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka out[0] = second + twoLetterBase; 7222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka out[1] = third + twoLetterBase; 7322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka return 2; 7422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka } else { 7522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka out[0] = first + threeLetterBase; 7622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka out[1] = second + threeLetterBase; 7722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka out[2] = third + threeLetterBase; 7822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka return 3; 7922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka } 8022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka} 8122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka 8222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka// Find the next '-' or '_' index from startOffset position. If not found, returns bufferLength. 8322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonakastatic size_t nextDelimiterIndex(const char* buffer, size_t bufferLength, size_t startOffset) { 8422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka for (size_t i = startOffset; i < bufferLength; ++i) { 8522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka if (buffer[i] == '-' || buffer[i] == '_') { 8622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka return i; 8722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka } 8822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka } 8922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka return bufferLength; 9022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka} 9122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka 9222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonakastatic inline bool isLowercase(char c) { 9322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka return 'a' <= c && c <= 'z'; 9422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka} 9522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka 9622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonakastatic inline bool isUppercase(char c) { 9722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka return 'A' <= c && c <= 'Z'; 9822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka} 9922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka 10022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonakastatic inline bool isDigit(char c) { 10122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka return '0' <= c && c <= '9'; 10222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka} 10322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka 10422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka// Returns true if the buffer is valid for language code. 10522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonakastatic inline bool isValidLanguageCode(const char* buffer, size_t length) { 10622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka if (length != 2 && length != 3) return false; 10722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka if (!isLowercase(buffer[0])) return false; 10822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka if (!isLowercase(buffer[1])) return false; 10922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka if (length == 3 && !isLowercase(buffer[2])) return false; 11022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka return true; 11122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka} 11222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka 11322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka// Returns true if buffer is valid for script code. The length of buffer must be 4. 11422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonakastatic inline bool isValidScriptCode(const char* buffer) { 11522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka return isUppercase(buffer[0]) && isLowercase(buffer[1]) && isLowercase(buffer[2]) && 11622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka isLowercase(buffer[3]); 11722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka} 11822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka 11922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka// Returns true if the buffer is valid for region code. 12022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonakastatic inline bool isValidRegionCode(const char* buffer, size_t length) { 12122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka return (length == 2 && isUppercase(buffer[0]) && isUppercase(buffer[1])) || 12222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka (length == 3 && isDigit(buffer[0]) && isDigit(buffer[1]) && isDigit(buffer[2])); 12322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka} 12422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka 125198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka// Parse BCP 47 language identifier into internal structure 126198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo NonakaFontLanguage::FontLanguage(const char* buf, size_t length) : FontLanguage() { 12722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka size_t firstDelimiterPos = nextDelimiterIndex(buf, length, 0); 12822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka if (isValidLanguageCode(buf, firstDelimiterPos)) { 12922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka mLanguage = packLanguageOrRegion(buf, firstDelimiterPos, 'a', 'a'); 130198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } else { 131198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka // We don't understand anything other than two-letter or three-letter 132198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka // language codes, so we skip parsing the rest of the string. 133198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka return; 134198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } 135198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 13622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka if (firstDelimiterPos == length) { 13722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka mHbLanguage = hb_language_from_string(getString().c_str(), -1); 13822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka return; // Language code only. 13922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka } 14022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka 14122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka size_t nextComponentStartPos = firstDelimiterPos + 1; 14222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka size_t nextDelimiterPos = nextDelimiterIndex(buf, length, nextComponentStartPos); 14322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka size_t componentLength = nextDelimiterPos - nextComponentStartPos; 14422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka 14522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka if (componentLength == 4) { 14622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka // Possibly script code. 14722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka const char* p = buf + nextComponentStartPos; 14822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka if (isValidScriptCode(p)) { 14922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka mScript = SCRIPT_TAG(p[0], p[1], p[2], p[3]); 15022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka mSubScriptBits = scriptToSubScriptBits(mScript); 151198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } 15222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka 15322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka if (nextDelimiterPos == length) { 15422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka mHbLanguage = hb_language_from_string(getString().c_str(), -1); 15522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka mEmojiStyle = resolveEmojiStyle(buf, length, mScript); 15622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka return; // No region code. 157198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } 15822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka 15922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka nextComponentStartPos = nextDelimiterPos + 1; 16022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka nextDelimiterPos = nextDelimiterIndex(buf, length, nextComponentStartPos); 16122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka componentLength = nextDelimiterPos - nextComponentStartPos; 162198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } 163e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui 16422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka if (componentLength == 2 || componentLength == 3) { 16522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka // Possibly region code. 16622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka const char* p = buf + nextComponentStartPos; 16722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka if (isValidRegionCode(p, componentLength)) { 16822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka mRegion = packLanguageOrRegion(p, componentLength, 'A', '0'); 16922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka } 170e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui } 17122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka 17222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka mHbLanguage = hb_language_from_string(getString().c_str(), -1); 17322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka mEmojiStyle = resolveEmojiStyle(buf, length, mScript); 17422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka} 17522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka 17622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka// static 17722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo NonakaFontLanguage::EmojiStyle FontLanguage::resolveEmojiStyle(const char* buf, size_t length, 17822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka uint32_t script) { 17922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka // First, lookup emoji subtag. 180e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui // 10 is the length of "-u-em-text", which is the shortest emoji subtag, 181e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui // unnecessary comparison can be avoided if total length is smaller than 10. 182e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui const size_t kMinSubtagLength = 10; 18322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka if (length >= kMinSubtagLength) { 18422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka static const char kPrefix[] = "-u-em-"; 18522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka const char *pos = std::search(buf, buf + length, kPrefix, kPrefix + strlen(kPrefix)); 18622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka if (pos != buf + length) { // found 18722462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka pos += strlen(kPrefix); 18822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka const size_t remainingLength = length - (pos - buf); 18922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka if (isEmojiSubtag(pos, remainingLength, "emoji", 5)){ 19022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka return EMSTYLE_EMOJI; 19122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka } else if (isEmojiSubtag(pos, remainingLength, "text", 4)){ 19222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka return EMSTYLE_TEXT; 19322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka } else if (isEmojiSubtag(pos, remainingLength, "default", 7)){ 19422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka return EMSTYLE_DEFAULT; 19522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka } 19622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka } 197e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui } 198e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui 19922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka // If no emoji subtag was provided, resolve the emoji style from script code. 20022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka if (script == SCRIPT_TAG('Z', 's', 'y', 'e')) { 20122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka return EMSTYLE_EMOJI; 20222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka } else if (script == SCRIPT_TAG('Z', 's', 'y', 'm')) { 20322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka return EMSTYLE_TEXT; 204e1d7f6168a0a485ecac75cfc9ae3bdc5143d0fb1yirui } 20522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka 20622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka return EMSTYLE_EMPTY; 207198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka} 208198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 209198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka//static 210198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonakauint8_t FontLanguage::scriptToSubScriptBits(uint32_t script) { 211198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka uint8_t subScriptBits = 0u; 212198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka switch (script) { 213533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka case SCRIPT_TAG('B', 'o', 'p', 'o'): 214533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka subScriptBits = kBopomofoFlag; 215533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka break; 216198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka case SCRIPT_TAG('H', 'a', 'n', 'g'): 217198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka subScriptBits = kHangulFlag; 218198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka break; 219533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka case SCRIPT_TAG('H', 'a', 'n', 'b'): 220533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka // Bopomofo is almost exclusively used in Taiwan. 221533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka subScriptBits = kHanFlag | kBopomofoFlag; 222533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka break; 223198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka case SCRIPT_TAG('H', 'a', 'n', 'i'): 224198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka subScriptBits = kHanFlag; 225198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka break; 226198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka case SCRIPT_TAG('H', 'a', 'n', 's'): 227198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka subScriptBits = kHanFlag | kSimplifiedChineseFlag; 228198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka break; 229198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka case SCRIPT_TAG('H', 'a', 'n', 't'): 230198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka subScriptBits = kHanFlag | kTraditionalChineseFlag; 231198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka break; 232198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka case SCRIPT_TAG('H', 'i', 'r', 'a'): 233198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka subScriptBits = kHiraganaFlag; 234198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka break; 235198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka case SCRIPT_TAG('H', 'r', 'k', 't'): 236198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka subScriptBits = kKatakanaFlag | kHiraganaFlag; 237198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka break; 238198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka case SCRIPT_TAG('J', 'p', 'a', 'n'): 239198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka subScriptBits = kHanFlag | kKatakanaFlag | kHiraganaFlag; 240198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka break; 241198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka case SCRIPT_TAG('K', 'a', 'n', 'a'): 242198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka subScriptBits = kKatakanaFlag; 243198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka break; 244198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka case SCRIPT_TAG('K', 'o', 'r', 'e'): 245198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka subScriptBits = kHanFlag | kHangulFlag; 246198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka break; 247198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } 248198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka return subScriptBits; 249198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka} 250198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 251198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonakastd::string FontLanguage::getString() const { 25222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka if (isUnsupported()) { 253198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka return "und"; 254198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } 255198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka char buf[16]; 25622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka size_t i = unpackLanguageOrRegion(mLanguage, buf, 'a', 'a'); 257198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka if (mScript != 0) { 25822462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka buf[i++] = '-'; 25922462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka buf[i++] = (mScript >> 24) & 0xFFu; 26022462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka buf[i++] = (mScript >> 16) & 0xFFu; 26122462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka buf[i++] = (mScript >> 8) & 0xFFu; 26222462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka buf[i++] = mScript & 0xFFu; 26322462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka } 26422462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka if (mRegion != INVALID_CODE) { 26522462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka buf[i++] = '-'; 26622462be7358f1facbe0c0074f8e58a41c2314b6eSeigo Nonaka i += unpackLanguageOrRegion(mRegion, buf + i, 'A', '0'); 267198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } 268198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka return std::string(buf, i); 269198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka} 270198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 2716f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonakabool FontLanguage::isEqualScript(const FontLanguage& other) const { 272198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka return other.mScript == mScript; 273198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka} 274198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 275f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka// static 276f3afe92def0fff022889fd036d68451223aac146Seigo Nonakabool FontLanguage::supportsScript(uint8_t providedBits, uint8_t requestedBits) { 277f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka return requestedBits != 0 && (providedBits & requestedBits) == requestedBits; 2786f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka} 2796f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka 280198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonakabool FontLanguage::supportsHbScript(hb_script_t script) const { 281198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka static_assert(SCRIPT_TAG('J', 'p', 'a', 'n') == HB_TAG('J', 'p', 'a', 'n'), 282198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka "The Minikin script and HarfBuzz hb_script_t have different encodings."); 283198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka if (script == mScript) return true; 284f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka return supportsScript(mSubScriptBits, scriptToSubScriptBits(script)); 285198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka} 286198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 287f3afe92def0fff022889fd036d68451223aac146Seigo Nonakaint FontLanguage::calcScoreFor(const FontLanguages& supported) const { 288d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang bool languageScriptMatch = false; 289d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang bool subtagMatch = false; 290d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang bool scriptMatch = false; 291d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang 292f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka for (size_t i = 0; i < supported.size(); ++i) { 293d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang if (mEmojiStyle != EMSTYLE_EMPTY && 294d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang mEmojiStyle == supported[i].mEmojiStyle) { 295d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang subtagMatch = true; 296d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang if (mLanguage == supported[i].mLanguage) { 297d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang return 4; 298d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang } 299d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang } 300f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka if (isEqualScript(supported[i]) || 301f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka supportsScript(supported[i].mSubScriptBits, mSubScriptBits)) { 302d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang scriptMatch = true; 303f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka if (mLanguage == supported[i].mLanguage) { 304d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang languageScriptMatch = true; 305f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka } 306f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka } 307f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka } 308f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka 309f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka if (supportsScript(supported.getUnionOfSubScriptBits(), mSubScriptBits)) { 310d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang scriptMatch = true; 311d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang if (mLanguage == supported[0].mLanguage && supported.isAllTheSameLanguage()) { 312d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang return 3; 313d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang } 314f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka } 315f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka 316d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang if (languageScriptMatch) { 317d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang return 3; 318d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang } else if (subtagMatch) { 319d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang return 2; 320d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang } else if (scriptMatch) { 321d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang return 1; 322d478da324d3b98e736b6c42415c02d506cfb653fYirui Huang } 323f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka return 0; 324f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka} 325f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka 326f3afe92def0fff022889fd036d68451223aac146Seigo NonakaFontLanguages::FontLanguages(std::vector<FontLanguage>&& languages) 327f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka : mLanguages(std::move(languages)) { 328f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka if (mLanguages.empty()) { 329f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka return; 330f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka } 331f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka 332f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka const FontLanguage& lang = mLanguages[0]; 333f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka 334f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka mIsAllTheSameLanguage = true; 335f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka mUnionOfSubScriptBits = lang.mSubScriptBits; 336f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka for (size_t i = 1; i < mLanguages.size(); ++i) { 337f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka mUnionOfSubScriptBits |= mLanguages[i].mSubScriptBits; 338f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka if (mIsAllTheSameLanguage && lang.mLanguage != mLanguages[i].mLanguage) { 339f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka mIsAllTheSameLanguage = false; 340f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka } 3416f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka } 342198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka} 343198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 344198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#undef SCRIPT_TAG 34514e2d136aaef271ba131f917cf5f27baa31ae5adSeigo Nonaka} // namespace minikin 346