16d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka/* 26d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka * Copyright (C) 2015 The Android Open Source Project 36d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka * 46d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka * Licensed under the Apache License, Version 2.0 (the "License"); 56d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka * you may not use this file except in compliance with the License. 66d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka * You may obtain a copy of the License at 76d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka * 86d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka * http://www.apache.org/licenses/LICENSE-2.0 96d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka * 106d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka * Unless required by applicable law or agreed to in writing, software 116d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka * distributed under the License is distributed on an "AS IS" BASIS, 126d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 136d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka * See the License for the specific language governing permissions and 146d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka * limitations under the License. 156d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka */ 166d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka 176d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka#define LOG_TAG "Minikin" 186d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka 196d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka#include "FontLanguageListCache.h" 206d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka 21198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#include <unicode/uloc.h> 22198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#include <unordered_set> 236d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka 2439ab40115fae6d0c948e435233b3dd997ee7d8e5Mark Salyzyn#include <log/log.h> 25555d84c6f98eafcbe677cdcb8e9605760acd8ce5Mark Salyzyn 26198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#include "FontLanguage.h" 27555d84c6f98eafcbe677cdcb8e9605760acd8ce5Mark Salyzyn#include "MinikinInternal.h" 286d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka 2914e2d136aaef271ba131f917cf5f27baa31ae5adSeigo Nonakanamespace minikin { 306d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka 31fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonakaconst uint32_t FontLanguageListCache::kEmptyListId; 32fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka 33198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka// Returns the text length of output. 34198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonakastatic size_t toLanguageTag(char* output, size_t outSize, const std::string& locale) { 35198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka output[0] = '\0'; 36198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka if (locale.empty()) { 37198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka return 0; 38198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } 39198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 40198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka size_t outLength = 0; 41198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka UErrorCode uErr = U_ZERO_ERROR; 42198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka outLength = uloc_canonicalize(locale.c_str(), output, outSize, &uErr); 43198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka if (U_FAILURE(uErr)) { 44198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka // unable to build a proper language identifier 45198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka ALOGD("uloc_canonicalize(\"%s\") failed: %s", locale.c_str(), u_errorName(uErr)); 46198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka output[0] = '\0'; 47198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka return 0; 48198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } 49198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 50198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka // Preserve "und" and "und-****" since uloc_addLikelySubtags changes "und" to "en-Latn-US". 51198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka if (strncmp(output, "und", 3) == 0 && 52198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka (outLength == 3 || (outLength == 8 && output[3] == '_'))) { 53198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka return outLength; 54198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } 55198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 56198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka char likelyChars[ULOC_FULLNAME_CAPACITY]; 57198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka uErr = U_ZERO_ERROR; 58198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka uloc_addLikelySubtags(output, likelyChars, ULOC_FULLNAME_CAPACITY, &uErr); 59198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka if (U_FAILURE(uErr)) { 60198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka // unable to build a proper language identifier 61198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka ALOGD("uloc_addLikelySubtags(\"%s\") failed: %s", output, u_errorName(uErr)); 62198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka output[0] = '\0'; 63198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka return 0; 64198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } 65198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 66198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka uErr = U_ZERO_ERROR; 67198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka outLength = uloc_toLanguageTag(likelyChars, output, outSize, FALSE, &uErr); 68198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka if (U_FAILURE(uErr)) { 69198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka // unable to build a proper language identifier 70198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka ALOGD("uloc_toLanguageTag(\"%s\") failed: %s", likelyChars, u_errorName(uErr)); 71198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka output[0] = '\0'; 72198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka return 0; 73198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } 74198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#ifdef VERBOSE_DEBUG 75198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka ALOGD("ICU normalized '%s' to '%s'", locale.c_str(), output); 76198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka#endif 77198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka return outLength; 78198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka} 79198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 80f3afe92def0fff022889fd036d68451223aac146Seigo Nonakastatic std::vector<FontLanguage> parseLanguageList(const std::string& input) { 81f3afe92def0fff022889fd036d68451223aac146Seigo Nonaka std::vector<FontLanguage> result; 82198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka size_t currentIdx = 0; 83198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka size_t commaLoc = 0; 84198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka char langTag[ULOC_FULLNAME_CAPACITY]; 85198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka std::unordered_set<uint64_t> seen; 86198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka std::string locale(input.size(), 0); 87198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 88198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka while ((commaLoc = input.find_first_of(',', currentIdx)) != std::string::npos) { 89198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka locale.assign(input, currentIdx, commaLoc - currentIdx); 90198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka currentIdx = commaLoc + 1; 91198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka size_t length = toLanguageTag(langTag, ULOC_FULLNAME_CAPACITY, locale); 92198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka FontLanguage lang(langTag, length); 93198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka uint64_t identifier = lang.getIdentifier(); 94198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka if (!lang.isUnsupported() && seen.count(identifier) == 0) { 95198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka result.push_back(lang); 966f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka if (result.size() == FONT_LANGUAGES_LIMIT) { 976f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka break; 986f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka } 99198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka seen.insert(identifier); 100198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } 101198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } 1026f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka if (result.size() < FONT_LANGUAGES_LIMIT) { 1036f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka locale.assign(input, currentIdx, input.size() - currentIdx); 1046f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka size_t length = toLanguageTag(langTag, ULOC_FULLNAME_CAPACITY, locale); 1056f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka FontLanguage lang(langTag, length); 1066f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka uint64_t identifier = lang.getIdentifier(); 1076f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka if (!lang.isUnsupported() && seen.count(identifier) == 0) { 1086f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka result.push_back(lang); 1096f9966ea7c1910fd780cf7779cc59701c9b98a2bSeigo Nonaka } 110198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } 111198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka return result; 112198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka} 113198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka 114fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka// static 115fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonakauint32_t FontLanguageListCache::getId(const std::string& languages) { 116fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka FontLanguageListCache* inst = FontLanguageListCache::getInstance(); 117fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka std::unordered_map<std::string, uint32_t>::const_iterator it = 118fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka inst->mLanguageListLookupTable.find(languages); 119fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka if (it != inst->mLanguageListLookupTable.end()) { 120fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka return it->second; 1216d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka } 1226d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka 123fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka // Given language list is not in cache. Insert it and return newly assigned ID. 124fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka const uint32_t nextId = inst->mLanguageLists.size(); 125fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka FontLanguages fontLanguages(parseLanguageList(languages)); 126fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka if (fontLanguages.empty()) { 127fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka return kEmptyListId; 128198b46f1fea3f47ef8eb6317799c0d77aaec52f6Seigo Nonaka } 129fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka inst->mLanguageLists.push_back(std::move(fontLanguages)); 130fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka inst->mLanguageListLookupTable.insert(std::make_pair(languages, nextId)); 131fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka return nextId; 1326d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka} 1336d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka 1346d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka// static 135fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonakaconst FontLanguages& FontLanguageListCache::getById(uint32_t id) { 136fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka FontLanguageListCache* inst = FontLanguageListCache::getInstance(); 137fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka LOG_ALWAYS_FATAL_IF(id >= inst->mLanguageLists.size(), "Lookup by unknown language list ID."); 138fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka return inst->mLanguageLists[id]; 1396d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka} 1406d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka 1416d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka// static 142fd4124c53399581dd94eac5a9749bc07b474a294Seigo NonakaFontLanguageListCache* FontLanguageListCache::getInstance() { 143fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka assertMinikinLocked(); 144fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka static FontLanguageListCache* instance = nullptr; 145fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka if (instance == nullptr) { 146fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka instance = new FontLanguageListCache(); 147fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka 148fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka // Insert an empty language list for mapping default language list to kEmptyListId. 149fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka // The default language list has only one FontLanguage and it is the unsupported language. 150fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka instance->mLanguageLists.push_back(FontLanguages()); 151fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka instance->mLanguageListLookupTable.insert(std::make_pair("", kEmptyListId)); 152fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka } 153fd4124c53399581dd94eac5a9749bc07b474a294Seigo Nonaka return instance; 1546d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka} 1556d9dcd2cf3d3ed26a886e02d94c907311e7b1f83Seigo Nonaka 15614e2d136aaef271ba131f917cf5f27baa31ae5adSeigo Nonaka} // namespace minikin 157