1198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka/* 2198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * Copyright (C) 2015 The Android Open Source Project 3198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * 4198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * Licensed under the Apache License, Version 2.0 (the "License"); 5198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * you may not use this file except in compliance with the License. 6198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * You may obtain a copy of the License at 7198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * 8198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * http://www.apache.org/licenses/LICENSE-2.0 9198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * 10198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * Unless required by applicable law or agreed to in writing, software 11198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * distributed under the License is distributed on an "AS IS" BASIS, 12198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * See the License for the specific language governing permissions and 14198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka * limitations under the License. 15198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka */ 16198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 17198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#define LOG_TAG "Minikin" 18198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 19198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#include "FontLanguage.h" 20198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 21198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#include <hb.h> 22198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#include <unicode/uloc.h> 23198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 24198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonakanamespace android { 25198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 26198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#define SCRIPT_TAG(c1, c2, c3, c4) \ 27198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka (((uint32_t)(c1)) << 24 | ((uint32_t)(c2)) << 16 | ((uint32_t)(c3)) << 8 | \ 28198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka ((uint32_t)(c4))) 29198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 30198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka// Parse BCP 47 language identifier into internal structure 31198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo NonakaFontLanguage::FontLanguage(const char* buf, size_t length) : FontLanguage() { 32198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka size_t i; 33198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka for (i = 0; i < length; i++) { 34198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka char c = buf[i]; 35198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka if (c == '-' || c == '_') break; 36198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } 37198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka if (i == 2 || i == 3) { // only accept two or three letter language code. 38198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka mLanguage = buf[0] | (buf[1] << 8) | ((i == 3) ? (buf[2] << 16) : 0); 39198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } else { 40198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka // We don't understand anything other than two-letter or three-letter 41198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka // language codes, so we skip parsing the rest of the string. 42198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka mLanguage = 0ul; 43198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka return; 44198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } 45198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 46198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka size_t next; 47198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka for (i++; i < length; i = next + 1) { 48198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka for (next = i; next < length; next++) { 49198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka char c = buf[next]; 50198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka if (c == '-' || c == '_') break; 51198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } 52198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka if (next - i == 4 && 'A' <= buf[i] && buf[i] <= 'Z') { 53198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka mScript = SCRIPT_TAG(buf[i], buf[i + 1], buf[i + 2], buf[i + 3]); 54198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } 55198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } 56198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 57198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka mSubScriptBits = scriptToSubScriptBits(mScript); 58198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka} 59198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 60198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka//static 61198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonakauint8_t FontLanguage::scriptToSubScriptBits(uint32_t script) { 62198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka uint8_t subScriptBits = 0u; 63198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka switch (script) { 64533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka case SCRIPT_TAG('B', 'o', 'p', 'o'): 65533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka subScriptBits = kBopomofoFlag; 66533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka break; 67198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka case SCRIPT_TAG('H', 'a', 'n', 'g'): 68198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka subScriptBits = kHangulFlag; 69198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka break; 70533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka case SCRIPT_TAG('H', 'a', 'n', 'b'): 71533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka // Bopomofo is almost exclusively used in Taiwan. 72533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka subScriptBits = kHanFlag | kBopomofoFlag; 73533a01ea8438bb102b0dbc71f6c4ef356b260ed5Seigo Nonaka break; 74198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka case SCRIPT_TAG('H', 'a', 'n', 'i'): 75198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka subScriptBits = kHanFlag; 76198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka break; 77198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka case SCRIPT_TAG('H', 'a', 'n', 's'): 78198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka subScriptBits = kHanFlag | kSimplifiedChineseFlag; 79198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka break; 80198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka case SCRIPT_TAG('H', 'a', 'n', 't'): 81198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka subScriptBits = kHanFlag | kTraditionalChineseFlag; 82198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka break; 83198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka case SCRIPT_TAG('H', 'i', 'r', 'a'): 84198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka subScriptBits = kHiraganaFlag; 85198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka break; 86198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka case SCRIPT_TAG('H', 'r', 'k', 't'): 87198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka subScriptBits = kKatakanaFlag | kHiraganaFlag; 88198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka break; 89198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka case SCRIPT_TAG('J', 'p', 'a', 'n'): 90198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka subScriptBits = kHanFlag | kKatakanaFlag | kHiraganaFlag; 91198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka break; 92198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka case SCRIPT_TAG('K', 'a', 'n', 'a'): 93198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka subScriptBits = kKatakanaFlag; 94198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka break; 95198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka case SCRIPT_TAG('K', 'o', 'r', 'e'): 96198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka subScriptBits = kHanFlag | kHangulFlag; 97198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka break; 98d3376522332e1e016e59fabb22c24025092c724dSeigo Nonaka case SCRIPT_TAG('Z', 's', 'y', 'e'): 99198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka subScriptBits = kEmojiFlag; 100198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka break; 101198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } 102198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka return subScriptBits; 103198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka} 104198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 105198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonakastd::string FontLanguage::getString() const { 106198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka if (mLanguage == 0ul) { 107198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka return "und"; 108198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } 109198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka char buf[16]; 110198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka size_t i = 0; 111198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka buf[i++] = mLanguage & 0xFF ; 112198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka buf[i++] = (mLanguage >> 8) & 0xFF; 113198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka char third_letter = (mLanguage >> 16) & 0xFF; 114198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka if (third_letter != 0) buf[i++] = third_letter; 115198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka if (mScript != 0) { 116198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka buf[i++] = '-'; 117198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka buf[i++] = (mScript >> 24) & 0xFFu; 118198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka buf[i++] = (mScript >> 16) & 0xFFu; 119198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka buf[i++] = (mScript >> 8) & 0xFFu; 120198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka buf[i++] = mScript & 0xFFu; 121198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } 122198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka return std::string(buf, i); 123198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka} 124198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 1256f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonakabool FontLanguage::isEqualScript(const FontLanguage& other) const { 126198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka return other.mScript == mScript; 127198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka} 128198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 129f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka// static 130f3afe92def0fff022889fd036d68451223aac146Seigo Nonakabool FontLanguage::supportsScript(uint8_t providedBits, uint8_t requestedBits) { 131f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka return requestedBits != 0 && (providedBits & requestedBits) == requestedBits; 1326f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka} 1336f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka 134198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonakabool FontLanguage::supportsHbScript(hb_script_t script) const { 135198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka static_assert(SCRIPT_TAG('J', 'p', 'a', 'n') == HB_TAG('J', 'p', 'a', 'n'), 136198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka "The Minikin script and HarfBuzz hb_script_t have different encodings."); 137198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka if (script == mScript) return true; 138f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka return supportsScript(mSubScriptBits, scriptToSubScriptBits(script)); 139198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka} 140198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 141f3afe92def0fff022889fd036d68451223aac146Seigo Nonakaint FontLanguage::calcScoreFor(const FontLanguages& supported) const { 142f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka int score = 0; 143f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka for (size_t i = 0; i < supported.size(); ++i) { 144f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka if (isEqualScript(supported[i]) || 145f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka supportsScript(supported[i].mSubScriptBits, mSubScriptBits)) { 146f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka if (mLanguage == supported[i].mLanguage) { 147f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka return 2; 148f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka } else { 149f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka score = 1; 150f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka } 151f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka } 152f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka } 153f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka 154f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka if (score == 1) { 155f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka return score; 156f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka } 157f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka 158f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka if (supportsScript(supported.getUnionOfSubScriptBits(), mSubScriptBits)) { 159f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka // Gives score of 2 only if the language matches all of the font languages except for the 160f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka // exact match case handled above. 161f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka return (mLanguage == supported[0].mLanguage && supported.isAllTheSameLanguage()) ? 2 : 1; 162f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka } 163f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka 164f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka return 0; 165f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka} 166f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka 167f3afe92def0fff022889fd036d68451223aac146Seigo NonakaFontLanguages::FontLanguages(std::vector<FontLanguage>&& languages) 168f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka : mLanguages(std::move(languages)) { 169f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka if (mLanguages.empty()) { 170f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka return; 171f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka } 172f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka 173f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka const FontLanguage& lang = mLanguages[0]; 174f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka 175f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka mIsAllTheSameLanguage = true; 176f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka mUnionOfSubScriptBits = lang.mSubScriptBits; 177f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka for (size_t i = 1; i < mLanguages.size(); ++i) { 178f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka mUnionOfSubScriptBits |= mLanguages[i].mSubScriptBits; 179f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka if (mIsAllTheSameLanguage && lang.mLanguage != mLanguages[i].mLanguage) { 180f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka mIsAllTheSameLanguage = false; 181f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka } 1826f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka } 183198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka} 184198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 185198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#undef SCRIPT_TAG 186198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka} // namespace android 187